Fix explain plan for detection of lookup, use RVC with IN for delete …

…columns, normalize single element IN to equality, correctly detect null value in RVC during parse time
ramkrish86 · Feb 5, 2014 · ff1c1af · ff1c1af
1 parent 47707a7
commit ff1c1af
Show file tree

Hide file tree

Showing 13 changed files with 115 additions and 109 deletions.
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/DeleteCompiler.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/DeleteCompiler.java
@@ -24,6 +24,7 @@
 import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -235,10 +236,11 @@ public ParameterMetaData getParameterMetaData() {
                 public MutationState execute() {
                     // We have a point lookup, so we know we have a simple set of fully qualified
                     // keys for our ranges
-                    List<KeyRange> keys = context.getScanRanges().getRanges().get(0);
-                    Map<ImmutableBytesPtr,Map<PColumn,byte[]>> mutation = Maps.newHashMapWithExpectedSize(keys.size());
-                    for (KeyRange key : keys) {
-                        mutation.put(new ImmutableBytesPtr(key.getLowerRange()), PRow.DELETE_MARKER);
+                    ScanRanges ranges = context.getScanRanges();
+                    Iterator<KeyRange> iterator = ranges.getPointLookupKeyIterator(); 
+                    Map<ImmutableBytesPtr,Map<PColumn,byte[]>> mutation = Maps.newHashMapWithExpectedSize(ranges.getPointLookupCount());
+                    while (iterator.hasNext()) {
+                        mutation.put(new ImmutableBytesPtr(iterator.next().getLowerRange()), PRow.DELETE_MARKER);
                     }
                     return new MutationState(tableRef, mutation, 0, maxSize, connection);
                 }

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/ExpressionCompiler.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/ExpressionCompiler.java
@@ -31,7 +31,6 @@
 
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-
 import org.apache.phoenix.compile.GroupByCompiler.GroupBy;
 import org.apache.phoenix.exception.SQLExceptionCode;
 import org.apache.phoenix.exception.SQLExceptionInfo;
@@ -742,10 +741,7 @@ public Expression visitLeave(InListParseNode node, List<Expression> l) throws SQ
             return LiteralExpression.newConstant(null, PDataType.BOOLEAN, firstChild.isDeterministic());
         }
 
-        Expression e = InListExpression.create(inChildren, ptr);
-        if (node.isNegate()) {
-            e = new NotExpression(e);
-        }
+        Expression e = InListExpression.create(inChildren, ptr, node.isNegate());
         if (node.isStateless()) {
             if (!e.evaluate(null, ptr) || ptr.getLength() == 0) {
                 return LiteralExpression.newConstant(null, e.getDataType(), e.isDeterministic());

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java
@@ -21,6 +21,7 @@
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.hbase.client.Scan;
@@ -34,6 +35,7 @@
 import org.apache.phoenix.util.SchemaUtil;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 
 
@@ -201,6 +203,10 @@ public boolean isPointLookup() {
     public int getPointLookupCount() {
         return isPointLookup ? ranges.get(0).size() : 0;
     }
+
+    public Iterator<KeyRange> getPointLookupKeyIterator() {
+        return isPointLookup ? ranges.get(0).iterator() : Iterators.<KeyRange>emptyIterator();
+    }
 
     public void setScanStartStopRow(Scan scan) {
         if (isEverything()) {

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java
@@ -76,6 +76,7 @@
  * @since 0.1
  */
 public class WhereOptimizer {
+    private static final List<KeyRange> EVERYTHING_RANGES = Collections.<KeyRange>singletonList(KeyRange.EVERYTHING_RANGE);
     private static final List<KeyRange> SALT_PLACEHOLDER = Collections.singletonList(PDataType.CHAR.getKeyRange(QueryConstants.SEPARATOR_BYTE_ARRAY));
 
     private WhereOptimizer() {
@@ -177,8 +178,8 @@ public static Expression pushKeyExpressionsToScan(StatementContext context, Filt
                     cnf.add(Collections.singletonList(KeyRange.EVERYTHING_RANGE));
                 }
             }
-            // We support (a,b) IN ((1,2),(3,4), so in this case we switch to a flattened schema
-            if (fullyQualifiedColumnCount > 1 && slot.getPKSpan() == fullyQualifiedColumnCount && slot.getKeyRanges().size() > 1) {
+            // We support (a,b) IN ((1,2),(3,4)), so in this case we switch to a flattened schema
+            if (fullyQualifiedColumnCount > 1 && slot.getPKSpan() == fullyQualifiedColumnCount && !EVERYTHING_RANGES.equals(slot.getKeyRanges())) {
                 schema = nBuckets == null ? SchemaUtil.VAR_BINARY_SCHEMA : SaltingUtil.VAR_BINARY_SALTED_SCHEMA;
             }
             KeyPart keyPart = slot.getKeyPart();
@@ -269,7 +270,6 @@ public Expression visitLeave(AndExpression node, List<Expression> l) {
      * operators, CASE statements, and string concatenation.
      */
     public static class KeyExpressionVisitor extends TraverseNoExpressionVisitor<KeyExpressionVisitor.KeySlots> {
-        private static final List<KeyRange> EVERYTHING_RANGES = Collections.<KeyRange>singletonList(KeyRange.EVERYTHING_RANGE);
         private static final KeySlots DEGENERATE_KEY_PARTS = new KeySlots() {
             @Override
             public Iterator<KeySlot> iterator() {

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/InListExpression.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/InListExpression.java
@@ -29,19 +29,19 @@
 import java.util.Set;
 
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.index.util.ImmutableBytesPtr;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableUtils;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import org.apache.hadoop.hbase.index.util.ImmutableBytesPtr;
 import org.apache.phoenix.expression.visitor.ExpressionVisitor;
 import org.apache.phoenix.schema.ConstraintViolationException;
 import org.apache.phoenix.schema.PDataType;
 import org.apache.phoenix.schema.tuple.Tuple;
 import org.apache.phoenix.util.ByteUtil;
 
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
 /*
  * Implementation of a SQL foo IN (a,b,c) expression. Other than the first
  * expression, child expressions must be constants.
@@ -57,7 +57,7 @@ public class InListExpression extends BaseSingleExpression {
     private ImmutableBytesPtr value = new ImmutableBytesPtr();
     private List<Expression> keyExpressions; // client side only
 
-    public static Expression create (List<Expression> children, ImmutableBytesWritable ptr) throws SQLException {
+    public static Expression create (List<Expression> children, ImmutableBytesWritable ptr, boolean isNegate) throws SQLException {
         Expression firstChild = children.get(0);
         PDataType firstChildType = firstChild.getDataType();
 
@@ -101,13 +101,19 @@ public static Expression create (List<Expression> children, ImmutableBytesWritab
         // TODO: if inChildren.isEmpty() then Oracle throws a type mismatch exception. This means
         // that none of the list elements match in type and there's no null element. We'd return
         // false in this case. Should we throw?
-        if (keys.size() == 2) {
-            expression = new ComparisonExpression(CompareOp.EQUAL, keys);
+        // FIXME: We don't optimize RVC equality expression currently like we do with an IN
+        // expression, so don't convert in that case.
+        if (keys.size() == 2 && ! (firstChild instanceof RowValueConstructorExpression) ) {
+            expression = new ComparisonExpression(isNegate ? CompareOp.NOT_EQUAL : CompareOp.EQUAL, keys);
         } else {
             expression = new InListExpression(keys, coercedKeyExpressions);
+            if (isNegate) { 
+                expression = new NotExpression(expression);
+            }
         }
         return expression;
-    }    
+    }
+
     public InListExpression() {
     }
 

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/iterate/ExplainTable.java b/phoenix-core/src/main/java/org/apache/phoenix/iterate/ExplainTable.java
@@ -64,30 +64,27 @@ public ExplainTable(StatementContext context, TableRef table, GroupBy groupBy) {
 
     private boolean explainSkipScan(StringBuilder buf) {
         ScanRanges scanRanges = context.getScanRanges();
-        if (scanRanges.useSkipScanFilter()) {
+        if (scanRanges.isPointLookup()) {
             int keyCount = scanRanges.getPointLookupCount();
-            if (keyCount == 0) {
-                buf.append("SKIP SCAN ");
-                int count = 1;
-                boolean hasRanges = false;
-                for (List<KeyRange> ranges : scanRanges.getRanges()) {
-                    count *= ranges.size();
-                    for (KeyRange range : ranges) {
-                        hasRanges |= !range.isSingleKey();
-                    }
+            buf.append("POINT LOOKUP ON " + keyCount + " KEY" + (keyCount > 1 ? "S " : " "));
+        } else if (scanRanges.useSkipScanFilter()) {
+            buf.append("SKIP SCAN ");
+            int count = 1;
+            boolean hasRanges = false;
+            for (List<KeyRange> ranges : scanRanges.getRanges()) {
+                count *= ranges.size();
+                for (KeyRange range : ranges) {
+                    hasRanges |= !range.isSingleKey();
                 }
-                buf.append("ON ");
-                buf.append(count);
-                buf.append(hasRanges ? " RANGE" : " KEY");
-                buf.append(count > 1 ? "S " : " ");
-            } else {
-                buf.append("POINT LOOKUP ON " + keyCount + " KEY" + (keyCount > 1 ? "S " : " "));
             }
-            return true;
+            buf.append("ON ");
+            buf.append(count);
+            buf.append(hasRanges ? " RANGE" : " KEY");
+            buf.append(count > 1 ? "S " : " ");
         } else {
             buf.append("RANGE SCAN ");
         }
-        return false;
+        return scanRanges.useSkipScanFilter();
     }
 
     protected void explain(String prefix, List<String> planSteps) {

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/LiteralParseNode.java b/phoenix-core/src/main/java/org/apache/phoenix/parse/LiteralParseNode.java
@@ -44,8 +44,10 @@ public class LiteralParseNode extends TerminalParseNode {
     private final PDataType type;
 
     public LiteralParseNode(Object value) {
-        this.value = value;
         this.type = PDataType.fromLiteral(value);
+        // This will make the value null if the value passed through represents null for the given type.
+        // For example, an empty string is treated as a null.
+        this.value = this.type == null ? null : this.type.toObject(value, this.type);
     }
 
     public PDataType getType() {

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/ParseNodeRewriter.java b/phoenix-core/src/main/java/org/apache/phoenix/parse/ParseNodeRewriter.java
@@ -25,13 +25,13 @@
 import java.util.Map;
 
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import org.apache.phoenix.compile.ColumnResolver;
 import org.apache.phoenix.schema.AmbiguousColumnException;
 import org.apache.phoenix.schema.ColumnNotFoundException;
 
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
 /**
  * 
  * Base class for visitors that rewrite the expression node hierarchy
@@ -339,12 +339,21 @@ public ParseNode createNode(List<ParseNode> children) {
 
     @Override
     public ParseNode visitLeave(final InListParseNode node, List<ParseNode> nodes) throws SQLException {
-        return leaveCompoundNode(node, nodes, new CompoundNodeFactory() {
+        ParseNode normNode = leaveCompoundNode(node, nodes, new CompoundNodeFactory() {
             @Override
             public ParseNode createNode(List<ParseNode> children) {
                 return NODE_FACTORY.inList(children, node.isNegate());
             }
         });
+        if (normNode.getChildren().size() == 2) { // Rewrite as equality or inequality
+            // Rewrite row value constructor in = or != expression, as this is the same as if it was
+            // used in an equality expression which will be more efficient
+            ParseNode lhs = normNode.getChildren().get(0);
+            ParseNode rhs = normNode.getChildren().get(1);
+            normNode = NODE_FACTORY.comparison(node.isNegate() ? CompareOp.NOT_EQUAL : CompareOp.EQUAL, lhs, rhs);
+            normNode = normNode.accept(this);
+        }
+        return normNode;
     }
 
     @Override
@@ -387,14 +396,18 @@ private void rewriteRowValueConstuctorEqualityComparison(ParseNode lhs, ParseNod
             for (int i = 1; i < rhs.getChildren().size(); i++) {
                 rewriteRowValueConstuctorEqualityComparison(null, rhs.getChildren().get(i), andNodes);
             }
-        } else if (lhs == null && rhs == null) { // null == null will end up making the query degenerate
-            andNodes.add(NODE_FACTORY.comparison(CompareOp.EQUAL, null, null).accept(this));
-        } else if (lhs == null) { // AND rhs IS NULL
-            andNodes.add(NODE_FACTORY.isNull(rhs, false).accept(this));
-        } else if (rhs == null) { // AND lhs IS NULL
-            andNodes.add(NODE_FACTORY.isNull(lhs, false).accept(this));
-        } else { // AND lhs = rhs
-            andNodes.add(NODE_FACTORY.comparison(CompareOp.EQUAL, lhs, rhs).accept(this));
+        } else {
+            boolean isLHSNull = lhs == null || (lhs instanceof LiteralParseNode && ((LiteralParseNode)lhs).getValue() == null);
+            boolean isRHSNull = rhs == null || (rhs instanceof LiteralParseNode && ((LiteralParseNode)rhs).getValue() == null);
+            if (isLHSNull && isRHSNull) { // null == null will end up making the query degenerate
+                andNodes.add(NODE_FACTORY.comparison(CompareOp.EQUAL, lhs, rhs).accept(this));
+            } else if (isLHSNull) { // AND rhs IS NULL
+                andNodes.add(NODE_FACTORY.isNull(rhs, false).accept(this));
+            } else if (isRHSNull) { // AND lhs IS NULL
+                andNodes.add(NODE_FACTORY.isNull(lhs, false).accept(this));
+            } else { // AND lhs = rhs
+                andNodes.add(NODE_FACTORY.comparison(CompareOp.EQUAL, lhs, rhs).accept(this));
+            }
         }
     }
 

diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
@@ -1408,59 +1408,40 @@ public MutationState addColumn(AddColumnStatement statement) throws SQLException
 
 
     private String dropColumnMutations(PTable table, List<PColumn> columnsToDrop, List<Mutation> tableMetaData) throws SQLException {
-        String tenantId = connection.getTenantId() == null ? null : connection.getTenantId().getString();
+        String tenantId = connection.getTenantId() == null ? "" : connection.getTenantId().getString();
         String schemaName = table.getSchemaName().getString();
         String tableName = table.getTableName().getString();
         String familyName = null;
+        /*
+         * Generate a fully qualified RVC with an IN clause, since that's what our optimizer can
+         * handle currently. If/when the optimizer handles (A and ((B AND C) OR (D AND E))) we
+         * can factor out the tenant ID, schema name, and table name columns
+         */
         StringBuilder buf = new StringBuilder("DELETE FROM " + TYPE_SCHEMA + ".\"" + TYPE_TABLE + "\" WHERE ");
-        buf.append(TENANT_ID);
-        if (tenantId == null || tenantId.length() == 0) {
-            buf.append(" IS NULL AND ");
-        } else {
-            buf.append(" = ? AND ");
-        }
-        buf.append(TABLE_SCHEM_NAME);
-        if (schemaName == null || schemaName.length() == 0) {
-            buf.append(" IS NULL AND ");
-        } else {
-            buf.append(" = ? AND ");
+        buf.append("(" + 
+                TENANT_ID + "," + TABLE_SCHEM_NAME + "," + TABLE_NAME_NAME + "," + 
+                COLUMN_NAME + ", " + TABLE_CAT_NAME + ") IN (");
+        for(PColumn columnToDrop : columnsToDrop) {
+            buf.append("('" + tenantId + "'");
+            buf.append(",'" + schemaName + "'");
+            buf.append(",'" + tableName + "'");
+            buf.append(",'" + columnToDrop.getName().getString() + "'");
+            buf.append(",'" + (columnToDrop.getFamilyName() == null ? "" : columnToDrop.getFamilyName().getString()) + "'),");
         }
-        buf.append (TABLE_NAME_NAME + " = ? AND " + COLUMN_NAME + " = ? AND " + TABLE_CAT_NAME);
-        buf.append(" = ?");
+        buf.setCharAt(buf.length()-1, ')');
 
-        // TODO: when DeleteCompiler supports running an fully qualified IN query on the client-side,
-        // we can use a single IN query here instead of executing a different query per column being dropped.
-        PreparedStatement colDelete = connection.prepareStatement(buf.toString());
-        try {
-            for(PColumn columnToDrop : columnsToDrop) {
-                int i = 1;
-                if (tenantId != null && tenantId.length() > 0) {
-                    colDelete.setString(i++, tenantId);
-                }
-                if (schemaName != null & schemaName.length() > 0) {
-                    colDelete.setString(i++, schemaName);    
-                }
-                colDelete.setString(i++, tableName);
-                colDelete.setString(i++, columnToDrop.getName().getString());
-                colDelete.setString(i++, columnToDrop.getFamilyName() == null ? null : columnToDrop.getFamilyName().getString());
-                colDelete.execute();
-            }
-        } finally {
-            if(colDelete != null) {
-                colDelete.close();
-            }
-        }
+        connection.createStatement().execute(buf.toString());
 
-       Collections.sort(columnsToDrop,new Comparator<PColumn> () {
-           @Override
+        Collections.sort(columnsToDrop,new Comparator<PColumn> () {
+            @Override
             public int compare(PColumn left, PColumn right) {
                return Ints.compare(left.getPosition(), right.getPosition());
             }
         });
 
         int columnsToDropIndex = 0;
         PreparedStatement colUpdate = connection.prepareStatement(UPDATE_COLUMN_POSITION);
-        colUpdate.setString(1, connection.getTenantId() == null ? null : connection.getTenantId().getString());
+        colUpdate.setString(1, tenantId);
         colUpdate.setString(2, schemaName);
         colUpdate.setString(3, tableName);
         for (int i = columnsToDrop.get(columnsToDropIndex).getPosition() + 1; i < table.getColumns().size(); i++) {