From 8a86340e2d626a055e1635fe5b0fce4979c38180 Mon Sep 17 00:00:00 2001 From: Bo Cui Date: Thu, 8 Oct 2020 13:31:15 +0800 Subject: [PATCH 1/3] HBASE-24960 reduce invalid subprocedure task --- ...egionServerFlushTableProcedureManager.java | 6 + .../snapshot/RegionServerSnapshotManager.java | 6 + ...egionServerFlushTableProcedureManager.java | 143 ++++++++++++++++++ .../TestRegionServerSnapshotManager.java | 130 ++++++++++++++++ 4 files changed, 285 insertions(+) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/RegionServerFlushTableProcedureManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/RegionServerFlushTableProcedureManager.java index 1e95d15881fb..6871a723dd77 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/RegionServerFlushTableProcedureManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/RegionServerFlushTableProcedureManager.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; import org.apache.zookeeper.KeeperException; import org.apache.yetus.audience.InterfaceAudience; @@ -150,6 +151,11 @@ public Subprocedure buildSubprocedure(String table, String family) { throw new IllegalStateException("Failed to figure out if there is region to flush.", e1); } + if (CollectionUtils.isEmpty(involvedRegions)) { + LOG.info("no region of {} is online on the {}.", table, this.rss.getServerName()); + return null; + } + // We need to run the subprocedure even if we have no relevant regions. The coordinator // expects participation in the procedure and without sending message the master procedure // will hang and fail. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java index a01d118718d0..806dc7857368 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; import org.apache.hadoop.hbase.client.RegionReplicaUtil; @@ -170,6 +171,11 @@ public Subprocedure buildSubprocedure(SnapshotDescription snapshot) { + "something has gone awry with the online regions.", e1); } + if (CollectionUtils.isEmpty(involvedRegions)) { + LOG.info("no region of {} is online on the {}.", snapshot.getTable(), this.rss.getServerName()); + return null; + } + // We need to run the subprocedure even if we have no relevant regions. The coordinator // expects participation in the procedure and without sending message the snapshot attempt // will hang and fail. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java new file mode 100644 index 000000000000..3051a54bf352 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure.flush; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.procedure.ProcedureMember; +import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost; +import org.apache.hadoop.hbase.procedure.Subprocedure; +import org.apache.hadoop.hbase.procedure.SubprocedureFactory; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.List; +import static org.junit.Assert.*; + +@Category({MediumTests.class, RegionServerTests.class }) +public class TestRegionServerFlushTableProcedureManager { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionServerFlushTableProcedureManager.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRegionServerFlushTableProcedureManager.class); + private static HBaseTestingUtility TEST_UTIL; + private static Connection connection; + private static Admin admin; + private static Boolean hasError = false; + @BeforeClass + public static void setupBeforeClass() throws Exception { + TEST_UTIL = new HBaseTestingUtility(); + TEST_UTIL.startMiniCluster(2); + TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(); + connection = TEST_UTIL.getConnection(); + admin = connection.getAdmin(); + } + + @AfterClass + public static void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + private int createAndLoadTable(TableName tableName) throws IOException { + TEST_UTIL.createTable(tableName,new byte[][]{ Bytes.toBytes("fam")}); + TEST_UTIL.loadTable(connection.getTable(tableName), Bytes.toBytes("fam")); + return TEST_UTIL.countRows(tableName); + } + + private Object setAndGetField(Object object, String field, Object value) + throws IllegalAccessException, NoSuchFieldException { + Field f = null; + try { + f = object.getClass().getField(field); + } catch (NoSuchFieldException e) { + f = object.getClass().getSuperclass().getField(field); + } + f.setAccessible(true); + if (value != null) { + f.set(object, value); + } + return f.get(object); + } + + private void setRSSnapshotProcManagerMock(HRegionServer regionServer, boolean hasRegion) + throws NoSuchFieldException, IllegalAccessException { + RegionServerProcedureManagerHost + rspmHost = (RegionServerProcedureManagerHost) setAndGetField(regionServer, "rspmHost", null); + RegionServerFlushTableProcedureManager rsManager = rspmHost.getProcedureManagers().stream().filter(v -> v instanceof RegionServerFlushTableProcedureManager) + .map(v -> (RegionServerFlushTableProcedureManager)v).findAny().get(); + ProcedureMember procedureMember = (ProcedureMember) setAndGetField(rsManager, "member", null); + setAndGetField(procedureMember, "builder", new SubprocedureFactory() { + @Override + public Subprocedure buildSubprocedure(String procName, byte[] procArgs) { + String family = null; + // Currently we do not put other data except family, so it is ok to + // judge by length that if family was specified + if (procArgs.length > 0) { + try { + HBaseProtos.NameStringPair nsp = HBaseProtos.NameStringPair.parseFrom(procArgs); + family = nsp.getValue(); + } catch (Exception e) { + LOG.error("fail to get family by parsing from data", e); + hasError |= true; + } + } + Subprocedure subprocedure = rsManager.buildSubprocedure(procName, family); + hasError |= (hasRegion && subprocedure == null || !hasRegion && subprocedure != null); + return subprocedure; + } + }); + } + + @Test + public void testInvalidSubProcedure() + throws IOException, NoSuchFieldException, IllegalAccessException, InterruptedException { + TableName tableName = TableName.valueOf("test_table"); + int count = createAndLoadTable(tableName); + List regionLocationList = connection.getRegionLocator(tableName).getAllRegionLocations(); + ServerName serverName = regionLocationList.stream().map(v -> v.getServerName()).findAny().get(); + String regionName = regionLocationList.get(0).getRegion().getEncodedName(); + HRegion region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getRegion(regionName); + HRegionServer regionServer0 = TEST_UTIL.getHBaseCluster().getRegionServer(0); + HRegionServer regionServer1 = TEST_UTIL.getHBaseCluster().getRegionServer(1); + setRSSnapshotProcManagerMock(regionServer0, regionServer0.getServerName().equals(serverName)); + setRSSnapshotProcManagerMock(regionServer1, regionServer1.getServerName().equals(serverName)); + assertNotEquals(0, region.getMemStoreDataSize()); + admin.flush(tableName); + assertFalse(hasError); + assertEquals(0, region.getMemStoreDataSize()); + assertEquals(count, TEST_UTIL.countRows(tableName)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java new file mode 100644 index 000000000000..a9deee345487 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.snapshot; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.procedure.ProcedureMember; +import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost; +import org.apache.hadoop.hbase.procedure.Subprocedure; +import org.apache.hadoop.hbase.procedure.SubprocedureFactory; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.lang.reflect.Field; +import static org.junit.Assert.*; + +@Category({MediumTests.class, RegionServerTests.class }) +public class TestRegionServerSnapshotManager { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionServerSnapshotManager.class); + private static HBaseTestingUtility TEST_UTIL; + private static Connection connection; + private static Admin admin; + private static Boolean hasError = false; + @BeforeClass + public static void setupBeforeClass() throws Exception { + TEST_UTIL = new HBaseTestingUtility(); + TEST_UTIL.startMiniCluster(2); + TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(); + connection = TEST_UTIL.getConnection(); + admin = connection.getAdmin(); + } + + @AfterClass + public static void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + private int createAndLoadTable(TableName tableName) throws IOException { + TEST_UTIL.createTable(tableName,new byte[][]{Bytes.toBytes("fam")}); + TEST_UTIL.loadTable(connection.getTable(tableName), Bytes.toBytes("fam")); + return TEST_UTIL.countRows(tableName); + } + + private Object setAndGetField(Object object, String field, Object value) + throws IllegalAccessException, NoSuchFieldException { + Field f = null; + try { + f = object.getClass().getField(field); + } catch (NoSuchFieldException e) { + f = object.getClass().getSuperclass().getField(field); + } + f.setAccessible(true); + if (value != null) { + f.set(object, value); + } + return f.get(object); + } + + private void setRSSnapshotProcManagerMock(HRegionServer regionServer, boolean hasRegion) + throws NoSuchFieldException, IllegalAccessException { + RegionServerProcedureManagerHost rspmHost = (RegionServerProcedureManagerHost) setAndGetField(regionServer, "rspmHost", null); + RegionServerSnapshotManager rsManager = rspmHost.getProcedureManagers().stream().filter(v -> v instanceof RegionServerSnapshotManager) + .map(v -> (RegionServerSnapshotManager)v).findAny().get(); + ProcedureMember procedureMember = (ProcedureMember) setAndGetField(rsManager, "member", null); + setAndGetField(procedureMember, "builder", new SubprocedureFactory() { + @Override + public Subprocedure buildSubprocedure(String procName, byte[] procArgs) { + SnapshotDescription snapshot = null; + try { + snapshot = SnapshotDescription.parseFrom(procArgs); + } catch (InvalidProtocolBufferException e) { + throw new IllegalArgumentException("Could not read snapshot information from request."); + } + Subprocedure subprocedure = rsManager.buildSubprocedure(snapshot); + hasError |= (hasRegion && subprocedure == null || !hasRegion && subprocedure != null); + return subprocedure; + } + }); + } + + @Test + public void testInvalidSubProcedure() + throws IOException, NoSuchFieldException, IllegalAccessException { + TableName tableName = TableName.valueOf("test_table"); + int count = createAndLoadTable(tableName); + ServerName serverName = connection.getRegionLocator(tableName).getAllRegionLocations().stream() + .map(v -> v.getServerName()).findAny().get(); + HRegionServer regionServer0 = TEST_UTIL.getHBaseCluster().getRegionServer(0); + HRegionServer regionServer1 = TEST_UTIL.getHBaseCluster().getRegionServer(1); + setRSSnapshotProcManagerMock(regionServer0, regionServer0.getServerName().equals(serverName)); + setRSSnapshotProcManagerMock(regionServer1, regionServer1.getServerName().equals(serverName)); + admin.snapshot("ss", tableName); + assertFalse(hasError); + admin.disableTable(tableName); + admin.deleteTable(tableName); + admin.cloneSnapshot("ss", tableName); + assertEquals(count, TEST_UTIL.countRows(tableName)); + } + +} From a1ddf03c0908d9a6c51aa6c87ccf41250a4d698d Mon Sep 17 00:00:00 2001 From: Bo Cui Date: Wed, 25 Nov 2020 20:26:03 +0800 Subject: [PATCH 2/3] HBASE-24960 reduce invalid subprocedure task --- .../flush/TestRegionServerFlushTableProcedureManager.java | 4 ++-- .../snapshot/TestRegionServerSnapshotManager.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java index 3051a54bf352..cd84a6c3c27d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure/flush/TestRegionServerFlushTableProcedureManager.java @@ -81,9 +81,9 @@ private Object setAndGetField(Object object, String field, Object value) throws IllegalAccessException, NoSuchFieldException { Field f = null; try { - f = object.getClass().getField(field); + f = object.getClass().getDeclaredField(field); } catch (NoSuchFieldException e) { - f = object.getClass().getSuperclass().getField(field); + f = object.getClass().getSuperclass().getDeclaredField(field); } f.setAccessible(true); if (value != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java index a9deee345487..323452c65f49 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/snapshot/TestRegionServerSnapshotManager.java @@ -75,9 +75,9 @@ private Object setAndGetField(Object object, String field, Object value) throws IllegalAccessException, NoSuchFieldException { Field f = null; try { - f = object.getClass().getField(field); + f = object.getClass().getDeclaredField(field); } catch (NoSuchFieldException e) { - f = object.getClass().getSuperclass().getField(field); + f = object.getClass().getSuperclass().getDeclaredField(field); } f.setAccessible(true); if (value != null) { From acae4334a7edbf6ea5db39985a0af4d22b2887c3 Mon Sep 17 00:00:00 2001 From: Bo Date: Thu, 7 Jan 2021 19:04:11 +0800 Subject: [PATCH 3/3] HBASE-24960 reduce invalid subproc task --- .../master/snapshot/EnabledTableSnapshotHandler.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java index 81aa2df323d8..a31eb38f78a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java @@ -21,6 +21,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -70,8 +71,11 @@ public EnabledTableSnapshotHandler prepare() throws Exception { */ @Override protected void snapshotRegions(List> regions) throws IOException { - Set regionServers = new HashSet<>(regions.size()); - for (Pair region : regions) { + List> defaultRegions = regions.stream() + .filter(region -> RegionReplicaUtil.isDefaultReplica(region.getFirst())) + .collect(Collectors.toList()); + Set regionServers = new HashSet<>(defaultRegions.size()); + for (Pair region : defaultRegions) { if (region != null && region.getFirst() != null && region.getSecond() != null) { RegionInfo hri = region.getFirst(); if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue; @@ -96,7 +100,7 @@ protected void snapshotRegions(List> regions) throw LOG.info("Done waiting - online snapshot for " + this.snapshot.getName()); // Take the offline regions as disabled - for (Pair region : regions) { + for (Pair region : defaultRegions) { RegionInfo regionInfo = region.getFirst(); if (regionInfo.isOffline() && (regionInfo.isSplit() || regionInfo.isSplitParent()) && RegionReplicaUtil.isDefaultReplica(regionInfo)) {