diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index ba40020782..2fadb9d88a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -359,6 +359,14 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) { List namedExpressions = selectExpressionAnalyzer.analyze(node.getProjectList(), context, new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child)); + + for (UnresolvedExpression expr : node.getProjectList()) { + NestedAnalyzer nestedAnalyzer = new NestedAnalyzer( + namedExpressions, expressionAnalyzer, child + ); + child = nestedAnalyzer.analyze(expr, context); + } + // new context context.push(); TypeEnvironment newEnv = context.peek(); diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index ff3c01d5b8..8a8b3021a4 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -325,17 +325,6 @@ private Expression visitIdentifier(String ident, AnalysisContext context) { ReferenceExpression ref = DSL.ref(ident, typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident))); - // Fall back to old engine too if type is not supported semantically - if (isTypeNotSupported(ref.type())) { - throw new SyntaxCheckException(String.format( - "Identifier [%s] of type [%s] is not supported yet", ident, ref.type())); - } return ref; } - - // Array type is not supporte yet. - private boolean isTypeNotSupported(ExprType type) { - return "array".equalsIgnoreCase(type.typeName()); - } - } diff --git a/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java new file mode 100644 index 0000000000..497a0cefe0 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.analysis; + +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.List; +import java.util.Map; +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Alias; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.planner.logical.LogicalNested; +import org.opensearch.sql.planner.logical.LogicalPlan; + +/** + * Analyze the Nested Function in the {@link AnalysisContext} to construct the {@link + * LogicalPlan}. + */ +@RequiredArgsConstructor +public class NestedAnalyzer extends AbstractNodeVisitor { + private final List namedExpressions; + private final ExpressionAnalyzer expressionAnalyzer; + private final LogicalPlan child; + + public LogicalPlan analyze(UnresolvedExpression projectItem, AnalysisContext context) { + LogicalPlan nested = projectItem.accept(this, context); + return (nested == null) ? child : nested; + } + + @Override + public LogicalPlan visitAlias(Alias node, AnalysisContext context) { + return node.getDelegated().accept(this, context); + } + + @Override + public LogicalPlan visitFunction(Function node, AnalysisContext context) { + if (node.getFuncName().equalsIgnoreCase(BuiltinFunctionName.NESTED.name())) { + + List expressions = node.getFuncArgs(); + ReferenceExpression nestedField = + (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(0), context); + Map args; + if (expressions.size() == 2) { + args = Map.of( + "field", nestedField, + "path", (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(1), context) + ); + } else { + args = Map.of( + "field", (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(0), context), + "path", generatePath(nestedField.toString()) + ); + } + return new LogicalNested(child, List.of(args), namedExpressions); + } + return null; + } + + private ReferenceExpression generatePath(String field) { + return new ReferenceExpression(field.substring(0, field.lastIndexOf(".")), STRING); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 039b6380f7..1c796fabc2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -60,7 +60,6 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; -import org.opensearch.sql.expression.function.BuiltinFunctionName; /** * Class of static methods to create specific node instances. diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index b866dfb794..04d2b046a6 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -624,6 +624,10 @@ public static FunctionExpression xor(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.XOR, expressions); } + public static FunctionExpression nested(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.NESTED, expressions); + } + public static FunctionExpression not(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.NOT, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index 94bb4e067d..3c5b2af23c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -15,6 +15,7 @@ import lombok.RequiredArgsConstructor; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.env.Environment; @@ -100,7 +101,12 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - final ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + // For array types only first index currently supported. + if (value.type().equals(ExprCoreType.ARRAY)) { + wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); + } + if (!wholePathValue.isMissing() || paths.size() == 1) { return wholePathValue; } else { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 994ddbd559..fa5c5fb251 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -187,6 +187,8 @@ public enum BuiltinFunctionName { STDDEV_POP(FunctionName.of("stddev_pop")), // take top documents from aggregation bucket. TAKE(FunctionName.of("take")), + // Not always an aggregation query + NESTED(FunctionName.of("nested")), /** * Text Functions. diff --git a/core/src/main/java/org/opensearch/sql/expression/function/NestedFunctionResolver.java b/core/src/main/java/org/opensearch/sql/expression/function/NestedFunctionResolver.java new file mode 100644 index 0000000000..042e9e8fdc --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/NestedFunctionResolver.java @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.nested.NestedFunction; + +@RequiredArgsConstructor +public class NestedFunctionResolver + implements FunctionResolver { + + @Getter + private final FunctionName functionName; + + @Override + public Pair resolve(FunctionSignature unresolvedSignature) { + if (!unresolvedSignature.getFunctionName().equals(functionName)) { + throw new SemanticCheckException(String.format("Expected '%s' but got '%s'", + functionName.getFunctionName(), unresolvedSignature.getFunctionName().getFunctionName())); + } + + FunctionBuilder buildFunction = (functionProperties, args) + -> new NestedFunction(functionName, args); + return Pair.of(unresolvedSignature, buildFunction); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 842cf25cd6..02dc453dc1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -40,6 +40,8 @@ public void register(BuiltinFunctionRepository repository) { repository.register(match_phrase_prefix()); repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY)); repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY)); + // Functions supported in SELECT clause + repository.register(nested(BuiltinFunctionName.NESTED)); } private static FunctionResolver match_bool_prefix() { @@ -86,6 +88,11 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } + private static FunctionResolver nested(BuiltinFunctionName nested) { + FunctionName funcName = nested.getName(); + return new NestedFunctionResolver(funcName); + } + public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; diff --git a/core/src/main/java/org/opensearch/sql/expression/nested/NestedFunction.java b/core/src/main/java/org/opensearch/sql/expression/nested/NestedFunction.java new file mode 100644 index 0000000000..0c93034252 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/nested/NestedFunction.java @@ -0,0 +1,39 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.nested; + +import java.util.List; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.OpenSearchFunctions; + +public class NestedFunction extends OpenSearchFunctions.OpenSearchFunction { + private final List arguments; + + /** + * Required argument constructor. + * @param functionName name of the function + * @param arguments a list of expressions + */ + public NestedFunction(FunctionName functionName, List arguments) { + super(functionName, arguments); + this.arguments = arguments; + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + // Only need to resolve field argument which is always first index in member variable. + return valueEnv.resolve(this.arguments.get(0)); + } + + @Override + public ExprType type() { + return this.arguments.get(0).type(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index 4a6d4d8222..ef3cd82cf2 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -11,6 +11,7 @@ import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor; import org.opensearch.sql.planner.logical.LogicalProject; @@ -32,6 +33,7 @@ import org.opensearch.sql.planner.physical.RemoveOperator; import org.opensearch.sql.planner.physical.RenameOperator; import org.opensearch.sql.planner.physical.SortOperator; +import org.opensearch.sql.planner.physical.UnnestOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.planner.physical.WindowOperator; import org.opensearch.sql.storage.read.TableScanBuilder; @@ -94,6 +96,11 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { return new EvalOperator(visitChild(node, context), node.getExpressions()); } + @Override + public PhysicalPlan visitUnnest(LogicalNested node, C context) { + return new UnnestOperator(visitChild(node, context), node.getFields()); + } + @Override public PhysicalPlan visitSort(LogicalSort node, C context) { return new SortOperator(visitChild(node, context), node.getSortList()); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java new file mode 100644 index 0000000000..1eed9556bc --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; + +@EqualsAndHashCode(callSuper = true) +@Getter +@ToString +public class LogicalNested extends LogicalPlan { + private final List> fields; + private final List projectList; + + /** + * Constructor of LogicalNested. + * + */ + public LogicalNested( + LogicalPlan childPlan, + List> fields, + List projectList + ) { + super(Collections.singletonList(childPlan)); + this.fields = fields; + this.projectList = projectList; + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitUnnest(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index a192966287..411d9a51be 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -74,6 +74,14 @@ public LogicalPlan highlight(LogicalPlan input, Expression field, return new LogicalHighlight(input, field, arguments); } + + public static LogicalPlan nested( + LogicalPlan input, + List> nestedArgs, + List projectList) { + return new LogicalNested(input, nestedArgs, projectList); + } + public static LogicalPlan remove(LogicalPlan input, ReferenceExpression... fields) { return new LogicalRemove(input, ImmutableSet.copyOf(fields)); } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 9a41072fe7..1635b830b2 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -73,6 +73,10 @@ public R visitEval(LogicalEval plan, C context) { return visitNode(plan, context); } + public R visitUnnest(LogicalNested plan, C context) { + return visitNode(plan, context); + } + public R visitSort(LogicalSort plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java index 70847b869b..8e007272bf 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java @@ -14,6 +14,7 @@ import java.util.stream.Collectors; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.optimizer.rule.MergeFilterAndFilter; +import org.opensearch.sql.planner.optimizer.rule.MergeNestedAndNested; import org.opensearch.sql.planner.optimizer.rule.PushFilterUnderSort; import org.opensearch.sql.planner.optimizer.rule.read.CreateTableScanBuilder; import org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown; @@ -51,11 +52,13 @@ public static LogicalPlanOptimizer create() { * Phase 2: Transformations that rely on data source push down capability */ new CreateTableScanBuilder(), + new MergeNestedAndNested(), TableScanPushDown.PUSH_DOWN_FILTER, TableScanPushDown.PUSH_DOWN_AGGREGATION, TableScanPushDown.PUSH_DOWN_SORT, TableScanPushDown.PUSH_DOWN_LIMIT, TableScanPushDown.PUSH_DOWN_HIGHLIGHT, + TableScanPushDown.PUSH_DOWN_NESTED, TableScanPushDown.PUSH_DOWN_PROJECT, new CreateTableWriteBuilder())); } diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java index 856d8df7ea..8f5ac86580 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java @@ -16,6 +16,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalRelation; @@ -65,6 +66,13 @@ public static Pattern highlight(Patter return Pattern.typeOf(LogicalHighlight.class).with(source(pattern)); } + /** + * Logical nested operator with a given pattern on inner field. + */ + public static Pattern nested(Pattern pattern) { + return Pattern.typeOf(LogicalNested.class).with(source(pattern)); + } + /** * Logical project operator with a given pattern on inner field. */ diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/MergeNestedAndNested.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/MergeNestedAndNested.java new file mode 100644 index 0000000000..c15851acc5 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/MergeNestedAndNested.java @@ -0,0 +1,57 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.optimizer.rule; + +import static com.facebook.presto.matching.Pattern.typeOf; +import static org.opensearch.sql.planner.optimizer.pattern.Patterns.source; + +import com.facebook.presto.matching.Capture; +import com.facebook.presto.matching.Captures; +import com.facebook.presto.matching.Pattern; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.Getter; +import lombok.experimental.Accessors; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.planner.logical.LogicalNested; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.optimizer.Rule; + +/** + * Merge Nested --> Nested to the single Nested condition. + */ +public class MergeNestedAndNested implements Rule { + + private final Capture capture; + + @Accessors(fluent = true) + @Getter + private final Pattern pattern; + + /** + * Constructor of MergeNestedAndNested. + */ + public MergeNestedAndNested() { + this.capture = Capture.newCapture(); + this.pattern = typeOf(LogicalNested.class) + .with(source().matching(typeOf(LogicalNested.class).capturedAs(capture))); + } + + @Override + public LogicalPlan apply(LogicalNested nested, + Captures captures) { + LogicalNested childNested = captures.get(capture); + + List> combinedArgs = new ArrayList<>(); + combinedArgs.addAll(nested.getFields()); + combinedArgs.addAll(childNested.getFields()); + + return new LogicalNested( + childNested.getChild().get(0), + combinedArgs, childNested.getProjectList()); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index 556a12bb34..de2b47d403 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.planner.optimizer.pattern.Patterns.filter; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.highlight; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.limit; +import static org.opensearch.sql.planner.optimizer.pattern.Patterns.nested; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.project; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.scanBuilder; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.sort; @@ -74,6 +75,11 @@ public class TableScanPushDown implements Rule { scanBuilder())) .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); + public static final Rule PUSH_DOWN_NESTED = + match( + nested( + scanBuilder())) + .apply((nested, scanBuilder) -> scanBuilder.pushDownNested(nested)); /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index e6e59990c8..4826196cb2 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -11,6 +11,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import lombok.experimental.UtilityClass; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -105,4 +106,11 @@ public ValuesOperator values(List... values) { public static LimitOperator limit(PhysicalPlan input, Integer limit, Integer offset) { return new LimitOperator(input, limit, offset); } + + public static UnnestOperator unnest( + PhysicalPlan input, + Set args, + Map> groupedFieldsByPath) { + return new UnnestOperator(input, args, groupedFieldsByPath); + } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index d4bc4a1ea9..e404d5f0ee 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -57,6 +57,10 @@ public R visitEval(EvalOperator node, C context) { return visitNode(node, context); } + public R visitUnnest(UnnestOperator node, C context) { + return visitNode(node, context); + } + public R visitDedupe(DedupeOperator node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/UnnestOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/UnnestOperator.java new file mode 100644 index 0000000000..764e13569f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/UnnestOperator.java @@ -0,0 +1,316 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static java.util.stream.Collectors.mapping; +import static java.util.stream.Collectors.toList; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.ReferenceExpression; + +/** + * The UnnestOperator evaluates the {@link UnnestOperator#fields} and + * generates {@link UnnestOperator#nonNestedFields} to form the + * {@link UnnestOperator#result} output. Resolve two nested fields + * with differing paths will result in a cartesian product(inner join). + */ +@EqualsAndHashCode(callSuper = false) +public class UnnestOperator extends PhysicalPlan { + @Getter + private final PhysicalPlan input; + @Getter + private final Set fields; // Needs to be a Set to match legacy implementation + @Getter + private final Map> groupedPathsAndFields; + @EqualsAndHashCode.Exclude + private List> result = new ArrayList<>(); + @EqualsAndHashCode.Exclude + private List nonNestedFields = new ArrayList<>(); + @EqualsAndHashCode.Exclude + private ListIterator> flattenedResult = result.listIterator(); + + /** + * Constructor for UnnestOperator with list of map as arg. + * @param input : PhysicalPlan input. + * @param fields : List of all fields and paths for nested fields. + */ + public UnnestOperator(PhysicalPlan input, List> fields) { + this.input = input; + this.fields = fields.stream() + .map(m -> m.get("field").toString()) + .collect(Collectors.toSet()); + this.groupedPathsAndFields = fields.stream().collect( + Collectors.groupingBy( + m -> m.get("path").toString(), + mapping( + m -> m.get("field").toString(), + toList() + ) + ) + ); + } + + /** + * Constructor for UnnestOperator with Set of fields. + * @param input : PhysicalPlan input. + * @param fields : List of all fields for nested fields. + * @param groupedPathsAndFields : Map of fields grouped by their path. + */ + public UnnestOperator( + PhysicalPlan input, + Set fields, + Map> groupedPathsAndFields + ) { + this.input = input; + this.fields = fields; + this.groupedPathsAndFields = groupedPathsAndFields; + } + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitUnnest(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + return input.hasNext() || flattenedResult.hasNext(); + } + + + @Override + public ExprValue next() { + if (!flattenedResult.hasNext()) { + result.clear(); + nonNestedFields.clear(); + + ExprValue inputValue = input.next(); + generateNonNestedFieldsMap(inputValue); + // Add all nested fields to result map + for (String field : fields) { + result = flatten(field, inputValue, result, true); + } + + // Add all non-nested fields to result map + for (String nonNestedField : nonNestedFields) { + result = flatten(nonNestedField, inputValue, result, false); + } + + if (result.isEmpty()) { + return new ExprTupleValue(new LinkedHashMap<>()); + } + + flattenedResult = result.listIterator(); + } + return new ExprTupleValue(new LinkedHashMap<>(flattenedResult.next())); + } + + /** + * Generate list of non-nested fields that are in inputMap, but not in the member variable + * fields list. + * @param inputMap : Row to parse non-nested fields. + */ + public void generateNonNestedFieldsMap(ExprValue inputMap) { + + for (Map.Entry inputField : inputMap.tupleValue().entrySet()) { + boolean foundNestedField = + this.fields.stream().anyMatch( + field -> field.split("\\.")[0].equalsIgnoreCase(inputField.getKey()) + ); + + if (!foundNestedField) { + boolean nestingComplete = false; + String nonNestedField = inputField.getKey(); + ExprValue currentObj = inputField.getValue(); + while (!nestingComplete) { + if (currentObj instanceof ExprTupleValue) { + var it = currentObj.tupleValue().entrySet().iterator(); + if (it.hasNext()) { + var next = it.next(); + currentObj = next.getValue(); + nonNestedField += "." + next.getKey(); + } else { + nestingComplete = true; + nonNestedField = null; + } + } else if (currentObj instanceof ExprCollectionValue) { + currentObj = currentObj.collectionValue().get(0); + } else { + nestingComplete = true; + } + } + + if (nonNestedField != null) { + this.nonNestedFields.add(nonNestedField); + } + } + } + } + + + /** + * Simplifies the structure of row's source Map by flattening it, + * making the full path of an object the key + * and the Object it refers to the value. + * + *

Sample input: + * keys = ['comments.likes'] + * row = comments: { + * likes: 2 + * } + * + *

Return: + * flattenedRow = {comment.likes: 2} + * + * @param nestedField : Field to query in row. + * @param row : Row returned from OS. + * @param prevList : List of previous nested calls. + * @param supportArrays : When false we do not need to execute a cross join. + * @return : List of nested select items or cartesian product of nested calls. + */ + private List> flatten( + String nestedField, + ExprValue row, + List> prevList, + boolean supportArrays + ) { + List> copy = new ArrayList<>(); + List> newList = new ArrayList<>(); + + ExprValue nestedObj = null; + getNested(nestedField, nestedField, row, copy, nestedObj, supportArrays); + + // Only one field in select statement + if (prevList.size() == 0) { + return copy; + } + + if (!supportArrays || containSamePath(copy.get(0))) { + var resultIt = this.result.iterator(); + Map resultVal = resultIt.next(); + var copyIt = copy.iterator(); + Map copyVal = copyIt.next(); + for (int i = 0; i < this.result.size(); i++) { + resultVal.putAll(copyVal); + if (copyIt.hasNext()) { + copyVal = copyIt.next(); + } + if (resultIt.hasNext()) { + resultVal = resultIt.next(); + } + } + return this.result; + } else { + // Generate cartesian product + for (Map prevMap : prevList) { + for (Map newMap : copy) { + newList.add(Stream.of(newMap, prevMap) + .flatMap(map -> map.entrySet().stream()) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue))); + } + } + return newList; + } + } + + /** + * Check if newMap field has any sharing paths in prevMap. + * @param newMap : New map to add to result set. + * @return : true if there is already a field added to result set with same path. + */ + boolean containSamePath(Map newMap) { + String newKey = newMap.keySet().iterator().next(); + Map resultMap = this.result.iterator().next(); + for (var entry : this.groupedPathsAndFields.entrySet()) { + if (entry.getValue().contains(newKey)) { + for (var map : resultMap.entrySet()) { + if (entry.getValue().contains(map.getKey())) { + return true; + } + } + } + } + return false; + } + + + /** + * Retrieve nested field(s) in row. + * + * @param field : Path for nested field. + * @param nestedField : Current level to nested field path. + * @param row : Row to resolve nested field. + * @param ret : List to add nested field to. + * @param nestedObj : Object at current nested level. + * @param supportArrays : Only first index of arrays is supports when false. + * @return : Object at current nested level. + */ + private void getNested( + String field, String nestedField, ExprValue row, + List> ret, ExprValue nestedObj, + boolean supportArrays + ) { + ExprValue currentObj = (nestedObj == null) ? row : nestedObj; + String[] splitKeys = nestedField.split("\\."); + + if (currentObj instanceof ExprTupleValue) { + ExprTupleValue currentMap = (ExprTupleValue) currentObj; + if (currentMap.tupleValue().containsKey(splitKeys[0])) { + currentObj = currentMap.tupleValue().get(splitKeys[0]); + } else { + currentObj = null; + } + } else if (currentObj instanceof ExprCollectionValue) { + ExprValue arrayObj = currentObj; + if (supportArrays) { + for (int x = 0; x < arrayObj.collectionValue().size(); x++) { + currentObj = arrayObj.collectionValue().get(x); + getNested(field, nestedField, row, ret, currentObj, supportArrays); + currentObj = null; + } + } else { // TODO remove when arrays are supported. + currentObj = arrayObj.collectionValue().get(0); + getNested(field, nestedField, row, ret, currentObj, supportArrays); + currentObj = null; + } + } else { + currentObj = null; + } + + // Return final nested result + if (currentObj != null + && (StringUtils.substringAfterLast(field, ".").equals(nestedField) + || !field.contains(".")) + ) { + ret.add(new HashMap<>(Map.of(field, currentObj))); + } else if (currentObj != null) { + getNested(field, nestedField.substring(nestedField.indexOf(".") + 1), + row, ret, currentObj, supportArrays); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java index c0fdf36e70..9af66e219f 100644 --- a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java +++ b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java @@ -10,6 +10,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor; import org.opensearch.sql.planner.logical.LogicalProject; @@ -104,6 +105,17 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return false; } + /** + * Can a given nested operator be pushed down to table scan builder. Assume no such support + * by default unless subclass override this. + * + * @param nested logical nested operator + * @return true if pushed down, otherwise false + */ + public boolean pushDownNested(LogicalNested nested) { + return false; + } + @Override public R accept(LogicalPlanNodeVisitor visitor, C context) { return visitor.visitTableScanBuilder(this, context); diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 1db29a6a42..820ebfc54d 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -36,6 +36,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.utils.MLCommonsConstants.ACTION; import static org.opensearch.sql.utils.MLCommonsConstants.ALGO; @@ -79,10 +80,14 @@ import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.HighlightExpression; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.planner.logical.LogicalAD; import org.opensearch.sql.planner.logical.LogicalMLCommons; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; import org.opensearch.sql.planner.logical.LogicalProject; @@ -367,6 +372,120 @@ public void project_source() { AstDSL.alias("double_value", AstDSL.field("double_value")))); } + @Test + public void project_nested_field_arg() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info", + function("nested", qualifiedName("message", "info")), null) + ) + ); + } + + @Test + public void project_nested_field_and_path_args() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info", + function( + "nested", + qualifiedName("message", "info"), + qualifiedName("message") + ), + null + ) + ) + ); + } + + @Test + public void project_nested_deep_field_arg() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info.id", STRING), + "path", new ReferenceExpression("message.info", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info.id", + DSL.nested(DSL.ref("message.info.id", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info.id", + DSL.nested(DSL.ref("message.info.id", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info.id", + function("nested", qualifiedName("message", "info", "id")), null) + ) + ); + } + @Test public void project_highlight() { Map args = new HashMap<>(); diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index c7a11658e3..2185a45279 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -259,17 +259,6 @@ public void case_clause() { AstDSL.stringLiteral("test")))); } - @Test - public void skip_array_data_type() { - SyntaxCheckException exception = - assertThrows(SyntaxCheckException.class, - () -> analyze(qualifiedName("array_value"))); - assertEquals( - "Identifier [array_value] of type [ARRAY] is not supported yet", - exception.getMessage() - ); - } - @Test public void undefined_var_semantic_check_failed() { SemanticCheckException exception = assertThrows(SemanticCheckException.class, diff --git a/core/src/test/java/org/opensearch/sql/config/TestConfig.java b/core/src/test/java/org/opensearch/sql/config/TestConfig.java index 74dde6c2e9..169832fd80 100644 --- a/core/src/test/java/org/opensearch/sql/config/TestConfig.java +++ b/core/src/test/java/org/opensearch/sql/config/TestConfig.java @@ -56,6 +56,9 @@ public class TestConfig { .put("timestamp_value", ExprCoreType.TIMESTAMP) .put("field_value1", ExprCoreType.STRING) .put("field_value2", ExprCoreType.STRING) + .put("message", ExprCoreType.STRING) + .put("message.info", ExprCoreType.STRING) + .put("message.info.id", ExprCoreType.STRING) .build(); protected StorageEngine storageEngine() { diff --git a/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java b/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java index d3b44fe6a1..46aae069bb 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java @@ -35,6 +35,7 @@ import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTupleValue; @@ -126,6 +127,16 @@ public void innner_none_object_field_contain_dot() { assertEquals(1990, actualValue.integerValue()); } + @Test + public void array_with_multiple_path_value() { + ReferenceExpression expr = new ReferenceExpression("message.info", STRING); + ExprValue actualValue = expr.resolve(tuple()); + + assertEquals(STRING, actualValue.type()); + // Array of object, only first index is used + assertEquals("First message in array", actualValue.stringValue()); + } + /** * { * "name": "bob smith" @@ -140,7 +151,11 @@ public void innner_none_object_field_contain_dot() { * }, * "address.local": { * "state": "WA", - * } + * }, + * "message": [ + * { "info": "message in array" }, + * { "info": "Only first index of array used" } + * ] * } */ private ExprTupleValue tuple() { @@ -151,12 +166,29 @@ private ExprTupleValue tuple() { ExprValueUtils.tupleValue(ImmutableMap.of("year", 2020)); ExprValue addressLocal = ExprValueUtils.tupleValue(ImmutableMap.of("state", "WA")); + ExprValue messageCollectionValue = + new ExprCollectionValue( + ImmutableList.of( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "info", stringValue("First message in array") + ) + ), + ExprValueUtils.tupleValue( + ImmutableMap.of( + "info", stringValue("Only first index of array used") + ) + ) + ) + ); + ExprTupleValue tuple = ExprTupleValue.fromExprValueMap(ImmutableMap.of( "name", new ExprStringValue("bob smith"), "project.year", new ExprIntegerValue(1990), "project", project, "address", address, - "address.local", addressLocal + "address.local", addressLocal, + "message", messageCollectionValue )); return tuple; } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/NestedFunctionResolverTest.java b/core/src/test/java/org/opensearch/sql/expression/function/NestedFunctionResolverTest.java new file mode 100644 index 0000000000..b0121cb88d --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/NestedFunctionResolverTest.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.exception.SemanticCheckException; + +class NestedFunctionResolverTest { + private final FunctionName sampleFuncName = FunctionName.of("sample_function"); + private NestedFunctionResolver resolver; + + @BeforeEach + void setUp() { + resolver = new NestedFunctionResolver(sampleFuncName); + } + + @Test + void resolve_correct_name_test() { + var sig = new FunctionSignature(sampleFuncName, List.of(STRING)); + Pair builderPair = resolver.resolve(sig); + assertEquals(sampleFuncName, builderPair.getKey().getFunctionName()); + } + + @Test + void resolve_invalid_name_test() { + var wrongFuncName = FunctionName.of("wrong_func"); + var sig = new FunctionSignature(wrongFuncName, List.of(STRING)); + Exception exception = assertThrows(SemanticCheckException.class, + () -> resolver.resolve(sig)); + assertEquals("Expected 'sample_function' but got 'wrong_func'", + exception.getMessage()); + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/nested/NestedFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/nested/NestedFunctionTest.java new file mode 100644 index 0000000000..9eacca0d40 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/nested/NestedFunctionTest.java @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.nested; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.data.model.ExprValueUtils.booleanValue; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; +import static org.opensearch.sql.data.model.ExprValueUtils.doubleValue; +import static org.opensearch.sql.data.model.ExprValueUtils.floatValue; +import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; +import static org.opensearch.sql.data.model.ExprValueUtils.longValue; +import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; +import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.ExpressionTestBase; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.env.Environment; + +public class NestedFunctionTest extends ExpressionTestBase { + + private static final ImmutableMap dataMap = ImmutableMap.of( + "message.integer_value", integerValue(1), + "message.long_value", longValue(1L), + "message.float_value", floatValue(1f), + "message.double_value", doubleValue(1d), + "message.boolean_value", booleanValue(true), + "message.string_value", stringValue("str"), + "message.struct_value", tupleValue(ImmutableMap.of("str", 1)), + "message.array_value", collectionValue(ImmutableList.of(1)) + ); + + protected static Environment env() { + return var -> { + return dataMap.get(((ReferenceExpression) var).getAttr()); + }; + } + + static Stream> generateValidData() { + return Stream.of( + dataMap + ); + } + + /** + * Test single parameter nested function parameters. + */ + @Test + public void test_single_param_nested() { + for (var entry : dataMap.entrySet()) { + FunctionExpression nested = DSL.nested(DSL.ref(entry.getKey(), entry.getValue().type())); + assertEquals(entry.getValue(), nested.valueOf(env())); + } + } + + /** + * Test double parameter nested function parameters with second parameter of type STRUCT. + */ + @Test + public void test_double_param_nested_struct() { + for (var entry : dataMap.entrySet()) { + FunctionExpression nested = DSL.nested(DSL.ref(entry.getKey(), entry.getValue().type()), + DSL.ref("message", STRUCT)); + assertEquals(entry.getValue(), nested.valueOf(env())); + } + } + + /** + * Test double parameter nested function parameters with second parameter of type ARRAY. + */ + @Test + public void test_double_param_nested_array() { + for (var entry : dataMap.entrySet()) { + FunctionExpression nested = DSL.nested(DSL.ref(entry.getKey(), entry.getValue().type()), + DSL.ref("message", ARRAY)); + assertEquals(entry.getValue(), nested.valueOf(env())); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 017cfb60ea..96946f484c 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -18,6 +18,7 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.eval; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.rareTopN; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.remove; @@ -31,6 +32,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Test; @@ -99,59 +101,81 @@ public void visitShouldReturnDefaultPhysicalOperator() { ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("name1", STRING)); Integer limit = 1; Integer offset = 1; + List> nestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List nestedProjectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null + ) + ); + Set unnestOperatorArgs = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + LogicalPlan plan = project( - limit( - LogicalPlanDSL.dedupe( - rareTopN( - sort( - eval( - remove( - rename( - aggregation( - filter(values(emptyList()), filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - dedupeField), - limit, - offset), + nested( + limit( + LogicalPlanDSL.dedupe( + rareTopN( + sort( + eval( + remove( + rename( + aggregation( + filter(values(emptyList()), filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + dedupeField), + limit, + offset), + nestedArgs, nestedProjectList), include); PhysicalPlan actual = plan.accept(implementor, null); assertEquals( PhysicalPlanDSL.project( - PhysicalPlanDSL.limit( - PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.rareTopN( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - PhysicalPlanDSL.filter( - PhysicalPlanDSL.values(emptyList()), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - dedupeField), - limit, - offset), + PhysicalPlanDSL.unnest( + PhysicalPlanDSL.limit( + PhysicalPlanDSL.dedupe( + PhysicalPlanDSL.rareTopN( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + PhysicalPlanDSL.filter( + PhysicalPlanDSL.values(emptyList()), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + dedupeField), + limit, + offset), + unnestOperatorArgs, groupedFieldsByPath), include), actual); } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index 341bcbc29e..fe76589066 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -8,12 +8,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.named; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; @@ -29,6 +31,7 @@ import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.window.WindowDefinition; @@ -152,6 +155,21 @@ public TableWriteOperator build(PhysicalPlan child) { assertNull(highlight.accept(new LogicalPlanNodeVisitor() { }, null)); + List> nestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, nestedArgs, projectList); + assertNull(nested.accept(new LogicalPlanNodeVisitor() { + }, null)); + LogicalPlan mlCommons = new LogicalMLCommons(LogicalPlanDSL.relation("schema", table), "kmeans", ImmutableMap.builder() diff --git a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java index 7516aa1809..4ce1788c22 100644 --- a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java @@ -14,10 +14,12 @@ import static org.opensearch.sql.data.model.ExprValueUtils.longValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.aggregation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.relation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.sort; @@ -26,6 +28,7 @@ import com.google.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -38,6 +41,8 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.Table; @@ -81,6 +86,57 @@ void filter_merge_filter() { ); } + /** + * Nested - Nested --> Nested. + */ + @Test + void nested_merge_nested() { + List> firstNestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("other.field", STRING), + "path", new ReferenceExpression("other", STRING) + ) + ); + + List> secondNestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List> combinedNestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ), + Map.of( + "field", new ReferenceExpression("other.field", STRING), + "path", new ReferenceExpression("other", STRING) + ) + ); + + + assertEquals( + nested( + tableScanBuilder, + combinedNestedArgs, + null + ), + optimize( + nested( + nested( + relation("schema", table), + firstNestedArgs, + null + ), + secondNestedArgs, + null + ) + ) + ); + } + /** * Filter - Sort --> Sort - Filter. */ @@ -253,6 +309,25 @@ void table_scan_builder_support_highlight_push_down_can_apply_its_rule() { ); } + @Test + void table_scan_builder_support_nested_push_down_can_apply_its_rule() { + when(tableScanBuilder.pushDownNested(any())).thenReturn(true); + + assertEquals( + tableScanBuilder, + optimize( + nested( + relation("schema", table), + List.of(Map.of("field", new ReferenceExpression("message.info", STRING))), + List.of(new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null)) + ) + ) + ); + } + @Test void table_not_support_scan_builder_should_not_be_impact() { Mockito.reset(table, tableScanBuilder); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index 735b914d3e..09f9ad6e54 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -16,6 +16,9 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -132,6 +135,13 @@ public void test_PhysicalPlanVisitor_should_return_null() { PhysicalPlan limit = PhysicalPlanDSL.limit(plan, 1, 1); assertNull(limit.accept(new PhysicalPlanNodeVisitor() { }, null)); + + Set unnestArgs = Set.of("nested.test"); + Map> groupedFieldsByPath = + Map.of("nested", List.of("nested.test")); + PhysicalPlan unnest = new UnnestOperator(plan, unnestArgs, groupedFieldsByPath); + assertNull(unnest.accept(new PhysicalPlanNodeVisitor() { + }, null)); } @Test diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/UnnestOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/UnnestOperatorTest.java new file mode 100644 index 0000000000..aaf80b32b9 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/UnnestOperatorTest.java @@ -0,0 +1,217 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; +import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.ReferenceExpression; + +@ExtendWith(MockitoExtension.class) +class UnnestOperatorTest extends PhysicalPlanTestBase { + @Mock + private PhysicalPlan inputPlan; + + private final ExprValue testData = tupleValue( + Map.of( + "message", + collectionValue( + List.of( + Map.of("info", "a"), + Map.of("info", "b"), + Map.of("info", "c") + ) + ), + "comment", + collectionValue( + List.of( + Map.of("data", "1"), + Map.of("data", "2"), + Map.of("data", "3") + ) + ) + ) + ); + + private final ExprValue testDataWithSamePath = tupleValue( + Map.of( + "message", + collectionValue( + List.of( + Map.of("info", "a"), + Map.of("info", "b"), + Map.of("info", "c"), + Map.of("id", "1"), + Map.of("id", "2"), + Map.of("id", "3") + ) + ) + ) + ); + + private final ExprValue nonNestedTestData = tupleValue( + Map.of( + "message", "val" + ) + ); + + private final ExprValue missingTupleData = tupleValue( + Map.of( + "tuple", + tupleValue( + Map.of() + ) + ) + ); + + private final ExprValue missingArrayData = tupleValue( + Map.of( + "missing", + collectionValue( + List.of("value") + ) + ) + ); + + @Test + public void nested_one_nested_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testData); + + Set fields = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + assertThat( + execute(new UnnestOperator(inputPlan, fields, groupedFieldsByPath)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message.info", "a", "comment.data", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "b", "comment.data", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "c", "comment.data", "1"))) + ) + ); + } + + @Test + public void nested_two_nested_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testData); + + List> fields = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING)), + Map.of( + "field", new ReferenceExpression("comment.data", STRING), + "path", new ReferenceExpression("comment", STRING)) + ); + + assertThat( + execute(new UnnestOperator(inputPlan, fields)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message.info", "a", "comment.data", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "a", "comment.data", "2"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "a", "comment.data", "3"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "b", "comment.data", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "b", "comment.data", "2"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "b", "comment.data", "3"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "c", "comment.data", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "c", "comment.data", "2"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "c", "comment.data", "3"))) + ) + ); + } + + @Test + public void nested_two_nested_fields_with_same_path() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testDataWithSamePath); + + List> fields = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING)), + Map.of( + "field", new ReferenceExpression("message.id", STRING), + "path", new ReferenceExpression("message", STRING)) + ); + assertThat( + execute(new UnnestOperator(inputPlan, fields)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message.info", "a", "message.id", "1"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "b", "message.id", "2"))), + tupleValue(new LinkedHashMap<>(Map.of("message.info", "c", "message.id", "3"))) + ) + ); + } + + @Test + public void non_nested_field_tests() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(nonNestedTestData); + + Set fields = Set.of("message"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + assertThat( + execute(new UnnestOperator(inputPlan, fields, groupedFieldsByPath)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message", "val"))) + ) + ); + } + + @Test + public void nested_missing_tuple_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(missingTupleData); + Set fields = Set.of("message.val"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.val")); + assertTrue( + execute(new UnnestOperator(inputPlan, fields, groupedFieldsByPath)) + .get(0) + .tupleValue() + .size() == 0 + ); + } + + @Test + public void nested_missing_array_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(missingArrayData); + Set fields = Set.of("missing.data"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.data")); + assertTrue( + execute(new UnnestOperator(inputPlan, fields, groupedFieldsByPath)) + .get(0) + .tupleValue() + .size() == 0 + ); + } +} diff --git a/docs/user/beyond/partiql.rst b/docs/user/beyond/partiql.rst index 6ad93ddeaf..76fec8405d 100644 --- a/docs/user/beyond/partiql.rst +++ b/docs/user/beyond/partiql.rst @@ -202,11 +202,11 @@ Selecting top level for object fields, object fields of array value and nested f os> SELECT city, accounts, projects FROM people; fetched rows / total rows = 1/1 - +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ - | city | accounts | projects | - |-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------| - | {'name': 'Seattle', 'location': {'latitude': 10.5}} | [{'id': 1},{'id': 2}] | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | - +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ + | city | accounts | projects | + |-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------| + | {'name': 'Seattle', 'location': {'latitude': 10.5}} | {'id': 1} | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | + +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ Example 2: Selecting Deeper Levels ---------------------------------- diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 6283bc70e1..17f7959f35 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -4299,6 +4299,27 @@ Another example to show how to set custom values for the optional parameters:: | tEsT wIlDcArD sensitive cases | +-------------------------------------------+ +NESTED +------ + +Description +>>>>>>>>>>> + +``nested(field | [field, path])`` + +The ``nested`` function maps to the ``nested`` query used in search engine. It returns nested field types in documents that match the provided specified field(s). + +Example with ``field`` and ``path`` parameters:: + + os> SELECT nested(message.info, message) FROM nested; + fetched rows / total rows = 2/2 + +----------------+ + | message.info | + |----------------| + | a | + | b | + +----------------+ + System Functions ================ diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 22a635cf38..a02bcf096a 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 8/8 + fetched rows / total rows = 9/9 +----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------| @@ -44,6 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | diff --git a/doctest/test_data/nested_objects.json b/doctest/test_data/nested_objects.json new file mode 100644 index 0000000000..fc5f56b4c5 --- /dev/null +++ b/doctest/test_data/nested_objects.json @@ -0,0 +1,4 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index c517b2756c..1fedbdf49e 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -27,6 +27,7 @@ BOOKS = "books" APACHE = "apache" WILDCARD = "wildcard" +NESTED = "nested" DATASOURCES = ".ql-datasources" @@ -95,6 +96,7 @@ def set_up_test_indices(test): load_file("books.json", index_name=BOOKS) load_file("apache.json", index_name=APACHE) load_file("wildcard.json", index_name=WILDCARD) + load_file("nested_objects.json", index_name=NESTED) load_file("datasources.json", index_name=DATASOURCES) @@ -124,7 +126,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/nested_objects.json b/doctest/test_mapping/nested_objects.json new file mode 100644 index 0000000000..4f0ed97433 --- /dev/null +++ b/doctest/test_mapping/nested_objects.json @@ -0,0 +1,47 @@ +{ + "mappings": { + "properties": { + "message": { + "type": "nested", + "properties": { + "info": { + "type": "keyword", + "index": "true" + }, + "author": { + "type": "keyword", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "index": "true" + }, + "dayOfWeek": { + "type": "long" + } + } + }, + "comment": { + "type": "nested", + "properties": { + "data": { + "type": "keyword", + "index": "true" + }, + "likes": { + "type": "long" + } + } + }, + "myNum": { + "type": "long" + }, + "someField": { + "type": "keyword", + "index": "true" + } + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java index bddaa22772..b1db21a2ff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java @@ -70,8 +70,7 @@ public void testSelectObjectInnerFields() { public void testSelectNestedFieldItself() { JSONObject response = new JSONObject(query("SELECT projects FROM %s")); - // Nested field is absent in OpenSearch Get Field Mapping response either hence "object" used - verifySchema(response, schema("projects", null, "object")); + verifySchema(response, schema("projects", null, "nested")); // Expect nested field itself is returned in a single cell verifyDataRows(response, diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 360497300e..cb86ed6a11 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -505,6 +505,10 @@ public enum Index { "nestedType", getNestedTypeIndexMapping(), "src/test/resources/nested_objects.json"), + NESTED_WITHOUT_ARRAYS(TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS, + "nestedTypeWithoutArrays", + getNestedTypeIndexMapping(), + "src/test/resources/nested_objects_without_arrays.json"), NESTED_WITH_QUOTES(TestsConstants.TEST_INDEX_NESTED_WITH_QUOTES, "nestedType", getNestedTypeIndexMapping(), @@ -593,11 +597,18 @@ public enum Index { "wildcard", getMappingFile("wildcard_index_mappings.json"), "src/test/resources/wildcard.json"), - DATASOURCES(TestsConstants.DATASOURCES, "datasource", getMappingFile("datasources_index_mappings.json"), - "src/test/resources/datasources.json"); + "src/test/resources/datasources.json"), + MULTI_NESTED(TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE, + "multi_nested", + getMappingFile("multi_nested.json"), + "src/test/resources/multi_nested_objects.json"), + NESTED_WITH_NULLS(TestsConstants.TEST_INDEX_NESTED_WITH_NULLS, + "multi_nested", + getNestedTypeIndexMapping(), + "src/test/resources/nested_with_nulls.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index e46993cd17..c3af98b794 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -31,6 +31,8 @@ public class TestsConstants { public final static String TEST_INDEX_LOCATION = TEST_INDEX + "_location"; public final static String TEST_INDEX_LOCATION2 = TEST_INDEX + "_location2"; public final static String TEST_INDEX_NESTED_TYPE = TEST_INDEX + "_nested_type"; + public final static String TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS = + TEST_INDEX + "_nested_type_without_arrays"; public final static String TEST_INDEX_NESTED_SIMPLE = TEST_INDEX + "_nested_simple"; public final static String TEST_INDEX_NESTED_WITH_QUOTES = TEST_INDEX + "_nested_type_with_quotes"; @@ -55,6 +57,8 @@ public class TestsConstants { public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; + public final static String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; + public final static String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public final static String DATASOURCES = ".ql-datasources"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java new file mode 100644 index 0000000000..bc462db656 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java @@ -0,0 +1,209 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_WITH_NULLS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.List; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.Test; +import org.junit.jupiter.api.Disabled; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class NestedIT extends SQLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.MULTI_NESTED); + loadIndex(Index.NESTED); + loadIndex(Index.NESTED_WITHOUT_ARRAYS); + loadIndex(Index.EMPLOYEE_NESTED); + loadIndex(Index.NESTED_WITH_NULLS); + } + + @Test + public void nested_function_with_array_of_nested_field_test() { + String query = "SELECT nested(message.info), nested(comment.data) FROM " + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("c", "ab"), + rows("a", "ab"), + rows("b", "aa"), + rows("c", "aa"), + rows("a", "ab"), + rows("zz", new JSONArray(List.of("aa", "bb")))); + } + + @Test + public void nested_function_in_select_test() { + String query = "SELECT nested(message.info), nested(comment.data), " + + "nested(message.dayOfWeek) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + + assertEquals(5, result.getInt("total")); + verifySchema(result, + schema("message.info", null, "keyword"), + schema("comment.data", null, "keyword"), + schema("message.dayOfWeek", null, "long")); + verifyDataRows(result, + rows("a", "ab", 1), + rows("b", "aa", 2), + rows("c", "aa", 1), + rows("c", "ab", 4), + rows("zz", "bb", 6)); + } + + // Has to be tested with JSON format when https://github.com/opensearch-project/sql/issues/1317 + // gets resolved + @Disabled // TODO fix me when aggregation is supported + public void nested_function_in_an_aggregate_function_in_select_test() { + String query = "SELECT sum(nested(message.dayOfWeek)) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows(14)); + } + + // TODO Enable me when nested aggregation is supported + @Disabled + public void nested_function_with_arrays_in_an_aggregate_function_in_select_test() { + String query = "SELECT sum(nested(message.dayOfWeek)) FROM " + + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows(19)); + } + + // TODO not currently supported by legacy, should we add implementation in AstBuilder? + @Disabled + public void nested_function_in_a_function_in_select_test() { + String query = "SELECT upper(nested(message.info)) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + + verifyDataRows(result, + rows("A"), + rows("B"), + rows("C"), + rows("C"), + rows("ZZ")); + } + + @Test + public void nested_function_with_array_of_multi_nested_field_test() { + String query = "SELECT nested(message.author.name) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("e"), + rows("f"), + rows("g"), + rows("h"), + rows("p"), + rows("yy")); + } + + @Test + public void nested_function_with_null_and_missing_fields_test() { + String query = "SELECT nested(message.info), nested(comment.data) FROM " + + TEST_INDEX_NESTED_WITH_NULLS; + JSONObject result = executeJdbcRequest(query); + + assertEquals(10, result.getInt("total")); + verifyDataRows(result, + rows(null, "hh"), + rows("b", "aa"), + rows("c", "aa"), + rows("c", "ab"), + rows("a", "ab"), + rows("zz", new JSONArray(List.of("aa", "bb"))), + rows("zz", new JSONArray(List.of("aa", "bb"))), + rows(null, "ee"), + rows("a", "ab"), + rows("rr", new JSONArray(List.of("asdf", "sdfg")))); + } + + @Test + public void nested_function_multiple_fields_with_matched_and_mismatched_paths_test() { + String query = + "SELECT nested(message.author), nested(message.dayOfWeek), nested(message.info), nested(comment.data), " + + "nested(comment.likes) FROM " + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("e", 1, "a", "ab", 3), + rows("f", 2, "b", "aa", 2), + rows("g", 1, "c", "aa", 3), + rows("h", 4, "c", "ab", 1), + rows("i", 5, "a", "ab", 1), + rows("zz", 6, "zz", new JSONArray(List.of("aa", "bb")), 10)); + } + + @Test + public void nested_with_non_nested_type_test() { + String query = "SELECT nested(someField) FROM " + TEST_INDEX_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains( + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"Illegal nested field name: someField\",\n" + + " \"type\": \"IllegalArgumentException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } + + @Test + public void nested_missing_path() { + String query = "SELECT nested(message.invalid) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains("" + + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"can't resolve Symbol(namespace=FIELD_NAME, name=message.invalid) in type env\",\n" + + " \"type\": \"SemanticCheckException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } + + @Test + public void nested_missing_path_argument() { + String query = "SELECT nested(message.author.name, invalid) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains("" + + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env\",\n" + + " \"type\": \"SemanticCheckException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/multi_nested.json b/integ-test/src/test/resources/indexDefinitions/multi_nested.json new file mode 100644 index 0000000000..d2da21d24c --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/multi_nested.json @@ -0,0 +1,42 @@ +{ + "mappings": { + "properties": { + "message": { + "type": "nested", + "properties": { + "info": { + "type": "keyword" + }, + "author": { + "type": "nested", + "properties": { + "name": { + "type": "keyword", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "address": { + "type": "nested", + "properties": { + "street": { + "type": "keyword" + }, + "number": { + "type": "integer" + } + } + } + } + }, + "dayOfWeek": { + "type": "long" + } + } + } + } + } +} diff --git a/integ-test/src/test/resources/multi_nested_objects.json b/integ-test/src/test/resources/multi_nested_objects.json new file mode 100644 index 0000000000..f5838fe90b --- /dev/null +++ b/integ-test/src/test/resources/multi_nested_objects.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":{"name": "e", "address": {"street": "bc", "number": 1}},"dayOfWeek":1}} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":{"name": "f", "address": {"street": "ab", "number": 2}},"dayOfWeek":2}} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":{"name": "g", "address": {"street": "sk", "number": 3}},"dayOfWeek":1}} +{"index":{"_id":"4"}} +{"message":[{"info":"d","author":{"name": "h", "address": {"street": "mb", "number": 4}},"dayOfWeek":4},{"info":"i","author":{"name": "p", "address": {"street": "on", "number": 5}},"dayOfWeek":5}]} +{"index":{"_id":"5"}} +{"message": [{"info":"zz","author":{"name": "yy", "address": {"street": "qc", "number": 6}},"dayOfWeek":6}]} diff --git a/integ-test/src/test/resources/nested_objects_without_arrays.json b/integ-test/src/test/resources/nested_objects_without_arrays.json new file mode 100644 index 0000000000..626e63e079 --- /dev/null +++ b/integ-test/src/test/resources/nested_objects_without_arrays.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":"g","dayOfWeek":1},"comment":{"data":"aa","likes":3},"myNum":3,"someField":"a"} +{"index":{"_id":"4"}} +{"message":{"info":"c","author":"h","dayOfWeek":4},"comment":{"data":"ab","likes":1},"myNum":4,"someField":"b"} +{"index":{"_id":"5"}} +{"message": {"info":"zz","author":"zz","dayOfWeek":6},"comment":{"data":"bb","likes":10},"myNum":3,"someField":"a"} diff --git a/integ-test/src/test/resources/nested_with_nulls.json b/integ-test/src/test/resources/nested_with_nulls.json new file mode 100644 index 0000000000..b02a8ab110 --- /dev/null +++ b/integ-test/src/test/resources/nested_with_nulls.json @@ -0,0 +1,24 @@ +{"index":{"_id":"1"}} +{"message":{"author":"e","dayOfWeek":5},"comment":{"data":"hh","likes":5},"myNum":7,"someField":"a"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":"g","dayOfWeek":1},"comment":{"data":"aa","likes":3},"myNum":3,"someField":"a"} +{"index":{"_id":"4"}} +{"message":[{"info":"c","author":"h","dayOfWeek":4},{"info":"a","author":"i","dayOfWeek":5}],"comment":{"data":"ab","likes":1},"myNum":4,"someField":"b"} +{"index":{"_id":"5"}} +{"message": [{"info":"zz","author":"zz","dayOfWeek":6}],"comment":{"data":["aa","bb"],"likes":10},"myNum":[3,4],"someField":"a"} +{"index":{"_id":"7"}} +{"message":[{"info":"zz", "author":"z\"z", "dayOfWeek":6}], "comment":{"data":["aa","bb"], "likes":10}, "myNum":[3,4], "someField":"a"} +{"index":{"_id":"8"}} +{"message":{"info":null,"author":"e","dayOfWeek":7},"comment":{"data":"ee","likes":6},"myNum":6,"someField":"a"} +{"index":{"_id":"9"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"10"}} +{"message":[{"info":"rr", "author":"this \"value\" contains quotes", "dayOfWeek":3}], "comment":{"data":["asdf","sdfg"], "likes":56}, "myNum":[1,2,4], "someField":"ert"} +{"index":{"_id":"11"}} +{"comment":{"data":"jj","likes":1},"myNum":8,"someField":"a"} +{"index":{"_id":"12"}} +{"message":null,"comment":{"data":"kk","likes":0},"myNum":9,"someField":"a"} +{"index":{"_id":"13"}} +{} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 034f9227ee..0c4548a368 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -15,8 +15,10 @@ import static org.opensearch.sql.utils.DateTimeFormatters.DATE_TIME_FORMATTER; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; import java.time.Instant; import java.time.format.DateTimeParseException; import java.util.ArrayList; @@ -234,15 +236,20 @@ private ExprValue parseStruct(Content content, String prefix) { */ private ExprValue parseArray(Content content, String prefix) { List result = new ArrayList<>(); - content.array().forEachRemaining(v -> { - // ExprCoreType.ARRAY does not indicate inner elements type. OpenSearch nested will be an - // array of structs, otherwise parseArray currently only supports array of strings. - if (v.isString()) { - result.add(parse(v, prefix, Optional.of(OpenSearchDataType.of(STRING)))); - } else { - result.add(parse(v, prefix, Optional.of(STRUCT))); - } - }); + // ExprCoreType.ARRAY does not indicate inner elements type. + if (Iterators.size(content.array()) == 1 && content.objectValue() instanceof JsonNode) { + result.add(parse(content, prefix, Optional.of(STRUCT))); + } else { + content.array().forEachRemaining(v -> { + // ExprCoreType.ARRAY does not indicate inner elements type. OpenSearch nested will be an + // array of structs, otherwise parseArray currently only supports array of strings. + if (v.isString()) { + result.add(parse(v, prefix, Optional.of(OpenSearchDataType.of(STRING)))); + } else { + result.add(parse(v, prefix, Optional.of(STRUCT))); + } + }); + } return new ExprCollectionValue(result); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index f06ecb8576..93ea752984 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -22,6 +22,7 @@ import org.opensearch.sql.planner.physical.RemoveOperator; import org.opensearch.sql.planner.physical.RenameOperator; import org.opensearch.sql.planner.physical.SortOperator; +import org.opensearch.sql.planner.physical.UnnestOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.planner.physical.WindowOperator; import org.opensearch.sql.storage.TableScanOperator; @@ -87,6 +88,15 @@ public PhysicalPlan visitEval(EvalOperator node, Object context) { return new EvalOperator(visitInput(node.getInput(), context), node.getExpressionList()); } + @Override + public PhysicalPlan visitUnnest(UnnestOperator node, Object context) { + return doProtect( + new UnnestOperator( + visitInput(node.getInput(), context), node.getFields(), node.getGroupedPathsAndFields() + ) + ); + } + @Override public PhysicalPlan visitDedupe(DedupeOperator node, Object context) { return new DedupeOperator(visitInput(node.getInput(), context), node.getDedupeList(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 97aeee3747..96587e1a02 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -6,10 +6,14 @@ package org.opensearch.sql.opensearch.request; +import static java.util.stream.Collectors.mapping; +import static java.util.stream.Collectors.toList; +import static org.opensearch.index.query.QueryBuilders.boolQuery; +import static org.opensearch.index.query.QueryBuilders.matchAllQuery; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; -import com.google.common.collect.Lists; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -19,14 +23,17 @@ import lombok.Getter; import lombok.ToString; import org.apache.commons.lang3.tuple.Pair; +import org.apache.lucene.search.join.ScoreMode; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.InnerHitBuilder; +import org.opensearch.index.query.NestedQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.sql.ast.expression.Literal; @@ -37,6 +44,7 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; +import org.opensearch.sql.planner.logical.LogicalNested; /** * OpenSearch search request builder. @@ -237,4 +245,72 @@ private boolean isSortByDocOnly() { } return false; } + + /** + * Push down nested to sourceBuilder. + * @param nestedArgs : Nested arguments to push down. + */ + public void pushDownNested(List> nestedArgs) { + initBoolQueryFilter(); + groupFieldNamesByPath(nestedArgs).forEach( + (path, fieldNames) -> buildInnerHit( + fieldNames, createEmptyNestedQuery(path) + ) + ); + } + + /** + * Initialize bool query for push down. + */ + private void initBoolQueryFilter() { + sourceBuilder.query(QueryBuilders.boolQuery()); + query().filter(boolQuery()); + } + + /** + * Map all field names in nested queries that use same path. + * @param fields : Fields for nested queries. + * @return : Map of path and associated field names. + */ + private Map> groupFieldNamesByPath( + List> fields) { + // TODO filter out reverse nested when supported - .filter(not(isReverseNested())) + return fields.stream().collect( + Collectors.groupingBy( + m -> m.get("path").toString(), + mapping( + m -> m.get("field").toString(), + toList() + ) + ) + ); + } + + /** + * Build inner hits portion to nested query. + * @param paths : Set of all paths used in nested queries. + * @param query : Current pushDown query. + */ + private void buildInnerHit(List paths, NestedQueryBuilder query) { + query.innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, paths.toArray(new String[0]), null) + )); + } + + /** + * Create a nested query with match all filter to place inner hits. + */ + private NestedQueryBuilder createEmptyNestedQuery(String path) { + NestedQueryBuilder nestedQuery = nestedQuery(path, matchAllQuery(), ScoreMode.None); + ((BoolQueryBuilder) query().filter().get(0)).must(nestedQuery); + return nestedQuery; + } + + /** + * Return current query. + * @return : Current source builder query. + */ + private BoolQueryBuilder query() { + return (BoolQueryBuilder) sourceBuilder.query(); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index aadd73efdd..bceb96836e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -96,7 +96,12 @@ public Iterator iterator() { .map(hit -> { ExprValue docData = exprValueFactory.construct(hit.getSourceAsString()); if (hit.getHighlightFields().isEmpty()) { - return docData; + if (hit.getInnerHits() == null) { + return docData; + } else { + Map rowSource = hit.getSourceAsMap(); + return ExprValueUtils.tupleValue(rowSource); + } } else { ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); builder.putAll(docData.tupleValue()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index d7483cfcf0..8e6c57d7d5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -13,6 +13,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalSort; import org.opensearch.sql.storage.TableScanOperator; @@ -97,6 +98,11 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return delegate.pushDownHighlight(highlight); } + @Override + public boolean pushDownNested(LogicalNested nested) { + return delegate.pushDownNested(nested); + } + private boolean sortByFieldsOnly(LogicalSort sort) { return sort.getSortList().stream() .map(sortItem -> sortItem.getRight() instanceof ReferenceExpression) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index 7190d58000..80befce0da 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -27,6 +27,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalSort; import org.opensearch.sql.storage.TableScanOperator; @@ -99,6 +100,15 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return true; } + @Override + public boolean pushDownNested(LogicalNested nested) { + indexScan.getRequestBuilder().pushDownNested(nested.getFields()); + indexScan.getRequestBuilder().pushDownProjects( + findReferenceExpressions(nested.getProjectList())); + // Return false intentionally to keep the original nested operator + return false; + } + /** * Find reference expression from expression. * @param expressions a list of expression. diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index 1c79a28f3f..a91702d7f4 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -309,6 +309,7 @@ void search() { new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); + when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue); // Mock second scroll request followed diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index f2da6fd1e0..7fc7f92a93 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -290,6 +290,7 @@ void search() throws IOException { new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); + when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue); // Mock second scroll request followed diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 1b9a8b7e65..8f2c954f65 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -34,6 +34,8 @@ import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.time.Instant; @@ -94,6 +96,23 @@ public void constructNullValue() { assertTrue(new OpenSearchJsonContent(null).isNull()); } + @Test + public void iterateArrayValue() throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + var arrayIt = new OpenSearchJsonContent(mapper.readTree("[\"zz\",\"bb\"]")).array(); + assertTrue(arrayIt.next().stringValue().equals("zz")); + assertTrue(arrayIt.next().stringValue().equals("bb")); + assertTrue(!arrayIt.hasNext()); + } + + @Test + public void iterateArrayValueWithOneElement() throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + var arrayIt = new OpenSearchJsonContent(mapper.readTree("[\"zz\"]")).array(); + assertTrue(arrayIt.next().stringValue().equals("zz")); + assertTrue(!arrayIt.hasNext()); + } + @Test public void constructNullArrayValue() { assertEquals(nullValue(), tupleValue("{\"intV\":[]}").get("intV")); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index 857ff601e1..c68b971c72 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -28,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -61,6 +62,7 @@ import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; +import org.opensearch.sql.planner.physical.UnnestOperator; @ExtendWith(MockitoExtension.class) class OpenSearchExecutionProtectorTest { @@ -104,10 +106,8 @@ public void testProtectIndexScan() { ImmutableMap.of(ref("name", STRING), ref("lastname", STRING)); Pair newEvalField = ImmutablePair.of(ref("name1", STRING), ref("name", STRING)); - Integer sortCount = 100; Pair sortField = ImmutablePair.of(DEFAULT_ASC, ref("name1", STRING)); - Integer size = 200; Integer limit = 10; Integer offset = 10; @@ -314,6 +314,21 @@ public void testVisitML() { executionProtector.visitML(mlOperator, null)); } + @Test + public void testVisitUnnest() { + Set args = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + UnnestOperator unnestOperator = + new UnnestOperator( + values(emptyList()), + args, + groupedFieldsByPath); + + assertEquals(executionProtector.doProtect(unnestOperator), + executionProtector.visitUnnest(unnestOperator, values(emptyList()))); + } + PhysicalPlan resourceMonitor(PhysicalPlan input) { return new ResourceMonitorPlan(input, resourceMonitor); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java index 980d68ed80..4a5fbd4873 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java @@ -9,39 +9,47 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.index.query.QueryBuilders.matchAllQuery; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; +import org.apache.lucene.search.join.ScoreMode; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.InnerHitBuilder; +import org.opensearch.index.query.NestedQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; +import org.opensearch.sql.planner.logical.LogicalNested; @ExtendWith(MockitoExtension.class) public class OpenSearchRequestBuilderTest { @@ -214,6 +222,69 @@ void testPushDownProject() { requestBuilder.getSourceBuilder()); } + @Test + void testPushDownNested() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + requestBuilder.pushDownNested(nested.getFields()); + + NestedQueryBuilder nestedQuery = nestedQuery("message", matchAllQuery(), ScoreMode.None) + .innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, new String[]{"message.info"}, null))); + + assertEquals( + new SearchSourceBuilder() + .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT), + requestBuilder.getSourceBuilder()); + } + + @Test + void testPushDownMultipleNestedWithSamePath() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ), + Map.of( + "field", new ReferenceExpression("message.from", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null), + new NamedExpression("message.from", DSL.nested(DSL.ref("message.from", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + requestBuilder.pushDownNested(nested.getFields()); + + NestedQueryBuilder nestedQuery = nestedQuery("message", matchAllQuery(), ScoreMode.None) + .innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, new String[]{"message.info", "message.from"}, null))); + assertEquals( + new SearchSourceBuilder() + .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT), + requestBuilder.getSourceBuilder()); + } + @Test void testPushTypeMapping() { Map typeMapping = Map.of("intA", OpenSearchDataType.of(INTEGER)); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java index 0a60503415..ebe7445319 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java @@ -101,6 +101,8 @@ void iterator() { when(searchHit1.getSourceAsString()).thenReturn("{\"id1\", 1}"); when(searchHit2.getSourceAsString()).thenReturn("{\"id1\", 2}"); + when(searchHit1.getInnerHits()).thenReturn(null); + when(searchHit2.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue1).thenReturn(exprTupleValue2); int i = 0; @@ -116,6 +118,31 @@ void iterator() { } } + @Test + void iterator_with_inner_hits() { + when(searchResponse.getHits()) + .thenReturn( + new SearchHits( + new SearchHit[] {searchHit1}, + new TotalHits(2L, TotalHits.Relation.EQUAL_TO), + 1.0F)); + when(searchHit1.getSourceAsString()).thenReturn("{\"id1\", 1}"); + when(searchHit1.getSourceAsMap()).thenReturn(Map.of("id1", 1)); + when(searchHit1.getInnerHits()).thenReturn( + Map.of( + "innerHit", + new SearchHits( + new SearchHit[] {searchHit1}, + new TotalHits(2L, TotalHits.Relation.EQUAL_TO), + 1.0F))); + + when(factory.construct(any())).thenReturn(exprTupleValue1); + + for (ExprValue hit : new OpenSearchResponse(searchResponse, factory)) { + assertEquals(exprTupleValue1, hit); + } + } + @Test void response_is_aggregation_when_aggregation_not_empty() { when(searchResponse.getAggregations()).thenReturn(aggregations); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index b90ca8836d..389ac6e280 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.relation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.sort; @@ -29,6 +30,7 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_FILTER; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; +import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_NESTED; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; @@ -60,6 +62,7 @@ import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.HighlightExpression; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; @@ -68,6 +71,7 @@ import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer; import org.opensearch.sql.planner.optimizer.rule.read.CreateTableScanBuilder; @@ -246,6 +250,39 @@ void test_highlight_push_down() { ); } + @Test + void test_nested_push_down() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + + assertEqualsAfterOptimization( + project( + nested( + indexScanBuilder( + withNestedPushedDown(nested.getFields())), args, projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ), + project( + nested( + relation("schema", table), args, projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ) + ); + } + /** * SELECT avg(intV) FROM schema WHERE intV = 1 GROUP BY string_value. */ @@ -577,6 +614,10 @@ private Runnable withHighlightPushedDown(String field, Map argu return () -> verify(requestBuilder, times(1)).pushDownHighlight(field, arguments); } + private Runnable withNestedPushedDown(List> fields) { + return () -> verify(requestBuilder, times(1)).pushDownNested(fields); + } + private static AggregationAssertHelper.AggregationAssertHelperBuilder aggregate(String aggName) { var aggBuilder = new AggregationAssertHelper.AggregationAssertHelperBuilder(); aggBuilder.aggregateName = aggName; @@ -607,6 +648,7 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, + PUSH_DOWN_NESTED, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index cf2aa392de..e3a8349328 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -312,6 +312,7 @@ functionCall | extractFunction #extractFunctionCall | getFormatFunction #getFormatFunctionCall | timestampFunction #timestampFunctionCall + | nestedFunction #nestedFunctionCall ; timestampFunction @@ -369,6 +370,22 @@ datetimePart | complexDateTimePart ; +nestedFunction + : NESTED LR_BRACKET + (nestedField | nestedField COMMA nestedPath) + RR_BRACKET + ; + +nestedField + : nestedIdent DOT nestedIdent (DOT nestedIdent)* + | stringLiteral + ; + +nestedPath + : nestedIdent (DOT nestedIdent)* + | stringLiteral + ; + highlightFunction : HIGHLIGHT LR_BRACKET relevanceField (COMMA highlightArg)* RR_BRACKET ; @@ -686,10 +703,18 @@ ident | scalarFunctionName ; + +nestedIdent + : ID + | BACKTICK_QUOTE_ID + | keywordsCanBeId + | scalarFunctionName + ; + keywordsCanBeId : FULL | FIELD | D | T | TS // OD SQL and ODBC special - | COUNT | SUM | AVG | MAX | MIN + | COUNT | SUM | AVG | MAX | MIN | NESTED | FIRST | LAST | TYPE // TODO: Type is keyword required by relevancy function. Remove this when relevancy functions moved out ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 02a7f7d1b9..1f12c80f78 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -198,7 +198,17 @@ private UnresolvedExpression visitSelectItem(SelectElementContext ctx) { String name = StringUtils.unquoteIdentifier(getTextInQuery(ctx.expression(), query)); UnresolvedExpression expr = visitAstExpression(ctx.expression()); - if (ctx.alias() == null) { + if (expr instanceof Alias) { + if (ctx.alias() != null) { + return new Alias( + ((Alias) expr).getName(), + ((Alias) expr).getDelegated(), + StringUtils.unquoteIdentifier(ctx.alias().getText()) + ); + } else { + return expr; + } + } else if (ctx.alias() == null) { return new Alias(name, expr); } else { String alias = StringUtils.unquoteIdentifier(ctx.alias().getText()); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 71a5c6c487..48e21ce33b 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -9,10 +9,10 @@ import static org.opensearch.sql.ast.dsl.AstDSL.between; import static org.opensearch.sql.ast.dsl.AstDSL.not; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; -import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NESTED; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT_LIKE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.POSITION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; @@ -41,6 +41,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.LikePredicateContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MultiFieldRelevanceFunctionContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NestedFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NoFieldRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NotExpressionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NullLiteralContext; @@ -68,6 +69,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -79,6 +81,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.AggregateFunction; +import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Case; @@ -153,6 +156,33 @@ public UnresolvedExpression visitScalarFunctionCall(ScalarFunctionCallContext ct return buildFunction(ctx.scalarFunctionName().getText(), ctx.functionArgs().functionArg()); } + @Override + public UnresolvedExpression visitNestedFunctionCall(NestedFunctionCallContext ctx) { + List args = new ArrayList(); + + args.add( + new QualifiedName( + ctx.nestedFunction().nestedField().nestedIdent().stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()) + ) + ); + if (ctx.nestedFunction().nestedPath() != null) { + args.add( + new QualifiedName( + ctx.nestedFunction().nestedPath().nestedIdent().stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()) + ) + ); + } + + return new Alias(ctx.nestedFunction().nestedField().getText(), + new Function(NESTED.getName().getFunctionName(), args)); + } + @Override public UnresolvedExpression visitGetFormatFunctionCall(GetFormatFunctionCallContext ctx) { return new Function( diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 5912a76f28..6c1b056ab8 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -635,6 +635,28 @@ public void can_parse_wildcard_query_relevance_function() { + "boost=1.5, case_insensitive=true, rewrite=\"scoring_boolean\")")); } + @Test + public void can_parse_nested_function() { + assertNotNull( + parser.parse("SELECT NESTED(FIELD.DAYOFWEEK) FROM TEST")); + assertNotNull( + parser.parse("SELECT NESTED('FIELD.DAYOFWEEK') FROM TEST")); + assertNotNull( + parser.parse("SELECT SUM(NESTED(FIELD.SUBFIELD)) FROM TEST")); + assertNotNull( + parser.parse("SELECT NESTED(FIELD.DAYOFWEEK, PATH) FROM TEST")); + } + + @Test + public void can_not_parse_nested_function_without_dot() { + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT NESTED(MESSAGE1) FROM TEST")); + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT COUNT(*) FROM TEST GROUP BY NESTED(MESSAGE2)")); + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT NESTED(PATH, INVALID_CONDITION)")); + } + @Test public void can_parse_yearweek_function() { assertNotNull(parser.parse("SELECT yearweek('1987-01-01')")); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 64a7445dc8..8fd34ca674 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -33,21 +33,13 @@ import com.google.common.collect.ImmutableList; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.stream.Stream; -import org.antlr.v4.runtime.tree.ParseTree; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.antlr.SyntaxCheckException; -import org.opensearch.sql.sql.antlr.SQLSyntaxParser; class AstBuilderTest extends AstBuilderTestBase { @@ -697,4 +689,39 @@ public void can_build_string_literal_highlight() { ); } + @Test + public void can_build_nested_select_field() { + assertEquals( + project( + relation("test"), + alias( + "message.info", + function("nested", qualifiedName("message", "info")), + null + ) + ), + buildAST("SELECT" + + " nested(message.info) " + + "FROM test" + ) + ); + } + + @Test + public void can_build_nested_select_field_with_alias() { + assertEquals( + project( + relation("test"), + alias( + "message.info", + function("nested", qualifiedName("message", "info")), + "nest" + ) + ), + buildAST("SELECT" + + " nested(message.info) as nest " + + "FROM test" + ) + ); + } } diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 52dd5e3572..f12dab2f9b 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.caseWhen; import static org.opensearch.sql.ast.dsl.AstDSL.dateLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.doubleLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.equalTo; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.highlight; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; @@ -38,10 +39,13 @@ import java.util.HashMap; import org.antlr.v4.runtime.CommonTokenStream; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.RelevanceFieldList; import org.opensearch.sql.ast.tree.Sort.SortOption; @@ -208,6 +212,40 @@ public void canBuildGetFormatFunctionCall() { ); } + @Test + public void canBuildNestedFunctionWithFieldParam() { + assertEquals( + new Alias("message.info", + function("nested", qualifiedName("message","info"))), + buildExprAst("nested(message.info)") + ); + } + + @Test + public void canBuildNestedFunctionWithFieldAndPathParams() { + assertEquals( + new Alias("message.info", + function("nested", + qualifiedName("message", "info"), + qualifiedName("message"))), + buildExprAst("nested(message.info, message)") + ); + } + + //TODO enable me when condition parameter is supported + @Disabled + public void canBuildNestedFunctionWithConditionParam() { + assertEquals( + new Alias("message", + function("nested", + qualifiedName("message"), + function("=", + qualifiedName("message", "info"), + stringLiteral("a")))), + buildExprAst("nested(message, message.info = 'a')") + ); + } + @Test public void canBuildNestedFunctionCall() { assertEquals(