From 382681e2d6b6c417145022d7df81390bf42f2773 Mon Sep 17 00:00:00 2001 From: chaijunjie0101 <1340011734@qq.com> Date: Sun, 29 Jan 2023 17:18:18 +0800 Subject: [PATCH] HBASE-26967 FilterList with FuzzyRowFilter and SingleColumnValueFilter evaluated with operator MUST_PASS_ONE doesn't work as expected(#4820) Close #4820 Co-authored-by: Duo Zhang Signed-off-by: Duo Zhang --- .../hadoop/hbase/filter/FilterBase.java | 2 +- .../hadoop/hbase/filter/FuzzyRowFilter.java | 43 +++++++++++--- .../filter/TestFuzzyRowFilterEndToEnd.java | 57 ++++++++++++++++++- 3 files changed, 91 insertions(+), 11 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java index 713c4acb2700..c80da159b7ec 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java @@ -71,7 +71,7 @@ public void filterRowCells(List ignored) throws IOException { } /** - * Fitlers that never filter by modifying the returned List of Cells can inherit this + * Filters that never filter by modifying the returned List of Cells can inherit this * implementation that does nothing. {@inheritDoc} */ @Override diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index 2feac5527f7a..fd5a81d694e3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.filter; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -48,18 +49,34 @@ *
  • 1 - means that this byte in provided row key is NOT fixed, i.e. row key's byte at this * position can be different from the one in provided row key
  • * - * Example: Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and - * is 4, length of actionId is 2 and year and month are 4 and 2 bytes long respectively. Let's - * assume that we need to fetch all users that performed certain action (encoded as "99") in Jan of - * any year. Then the pair (row key, fuzzy info) would be the following: row key = "????_99_????_01" - * (one can use any value instead of "?") fuzzy info = - * "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00" I.e. fuzzy info tells the matching - * mask is "????_99_????_01", where at ? can be any value. + * Example: + *

    + * Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and is 4, + * length of actionId is 2 and year and month are 4 and 2 bytes long respectively. + *

    + * Let's assume that we need to fetch all users that performed certain action (encoded as "99") in + * Jan of any year. Then the pair (row key, fuzzy info) would be the following: + * + *

    + * row key = "????_99_????_01" (one can use any value instead of "?")
    + * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00"
    + * 
    + * + * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ? can be any value. */ @InterfaceAudience.Public public class FuzzyRowFilter extends FilterBase { private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); private List> fuzzyKeysData; + // Used to record whether we want to skip the current row. + // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey + // returns true, we will just skip to next row, instead of calling getNextCellHint to determine + // the actual next row, so we need to implement filterCell and return SEEK_NEXT_USING_HINT to let + // upper layer call getNextCellHint. + // And if we do not implement filterRow, sometimes we will get incorrect result when using + // FuzzyRowFilter together with other filters, please see the description for HBASE-26967 for more + // details. + private boolean filterRow; private boolean done = false; /** @@ -144,6 +161,16 @@ private boolean isPreprocessedMask(byte[] mask) { return true; } + @Override + public void reset() throws IOException { + filterRow = false; + } + + @Override + public boolean filterRow() throws IOException { + return filterRow; + } + @Override public ReturnCode filterCell(final Cell c) { final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0; @@ -164,7 +191,7 @@ public ReturnCode filterCell(final Cell c) { } // NOT FOUND -> seek next using hint lastFoundIndex = -1; - + filterRow = true; return ReturnCode.SEEK_NEXT_USING_HINT; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java index 9bc0ed0cb942..872b6c8b541f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.filter; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import java.io.IOException; import java.nio.ByteBuffer; @@ -27,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.CompareOperator; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.HConstants; @@ -353,7 +355,6 @@ private void runScanner(Table hTable, int expectedSize, Filter filter) throws IO assertEquals(expectedSize, found); } - @SuppressWarnings("deprecation") @Test public void testFilterList() throws Exception { String cf = "f"; @@ -396,7 +397,6 @@ public void testFilterList() throws Exception { } - @SuppressWarnings("unchecked") private void runTest(Table hTable, int expectedSize) throws IOException { // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1] byte[] fuzzyKey1 = new byte[10]; @@ -454,4 +454,57 @@ private void runScanner(Table hTable, int expectedSize, Filter filter1, Filter f assertEquals(expectedSize, results.size()); } + + @Test + public void testHBASE26967() throws IOException { + byte[] row1 = Bytes.toBytes("1"); + byte[] row2 = Bytes.toBytes("2"); + String cf1 = "f1"; + String cf2 = "f2"; + String cq1 = "col1"; + String cq2 = "col2"; + + Table ht = + TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), new String[] { cf1, cf2 }); + + // Put data + List puts = Lists.newArrayList(); + puts.add(new Put(row1).addColumn(Bytes.toBytes(cf1), Bytes.toBytes(cq1), Bytes.toBytes("a1"))); + puts.add(new Put(row1).addColumn(Bytes.toBytes(cf2), Bytes.toBytes(cq2), Bytes.toBytes("a2"))); + puts.add(new Put(row2).addColumn(Bytes.toBytes(cf1), Bytes.toBytes(cq1), Bytes.toBytes("b1"))); + puts.add(new Put(row2).addColumn(Bytes.toBytes(cf2), Bytes.toBytes(cq2), Bytes.toBytes("b2"))); + ht.put(puts); + + TEST_UTIL.flush(); + + // FuzzyRowFilter + List> data = Lists.newArrayList(); + byte[] fuzzyKey = Bytes.toBytes("1"); + byte[] mask = new byte[] { 0 }; + data.add(new Pair<>(fuzzyKey, mask)); + FuzzyRowFilter fuzzyRowFilter = new FuzzyRowFilter(data); + + // SingleColumnValueFilter + Filter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes(cf2), + Bytes.toBytes(cq2), CompareOperator.EQUAL, Bytes.toBytes("x")); + + // FilterList + FilterList filterList = new FilterList(Operator.MUST_PASS_ONE); + filterList.addFilter(Lists.newArrayList(fuzzyRowFilter, singleColumnValueFilter)); + + Scan scan = new Scan(); + scan.setFilter(filterList); + + ResultScanner scanner = ht.getScanner(scan); + Result rs = scanner.next(); + assertEquals(0, Bytes.compareTo(row1, rs.getRow())); + + // The two cells (1,f1,col1,a1) (1,f2,col2,a2) + assertEquals(2, rs.listCells().size()); + + // Only one row who's rowKey=1 + assertNull(scanner.next()); + + TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName())); + } }