From 4a6ba116bf3a8484b8238968c02af58c2bf9496b Mon Sep 17 00:00:00 2001 From: huiruan Date: Fri, 18 Mar 2022 16:20:20 +0800 Subject: [PATCH] HBASE-26842 TestSnapshotProcedure fails in branch-2 (#4225) Closes #4225 Signed-off-by: Duo Zhang --- .../org/apache/hadoop/hbase/client/Admin.java | 6 +- .../hadoop/hbase/client/HBaseAdmin.java | 100 ++---- .../hbase/client/TestSnapshotFromAdmin.java | 199 ------------ .../procedure/TestSnapshotProcedure.java | 304 +----------------- .../TestSnapshotProcedureBasicSnapshot.java | 86 +++++ .../TestSnapshotProcedureConcurrently.java | 138 ++++++++ .../TestSnapshotProcedureMasterRestarts.java | 89 +++++ .../procedure/TestSnapshotProcedureRIT.java | 62 ++++ .../TestSnapshotProcedureRSCrashes.java | 81 +++++ ...estSnapshotProcedureSnapshotCorrupted.java | 78 +++++ .../hbase/snapshot/SnapshotTestingUtils.java | 29 -- .../snapshot/TestFlushSnapshotFromClient.java | 9 +- 12 files changed, 581 insertions(+), 600 deletions(-) delete mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureBasicSnapshot.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureConcurrently.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureMasterRestarts.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRIT.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRSCrashes.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureSnapshotCorrupted.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 0be8173dcdd5..f8aa9349b781 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2272,8 +2272,10 @@ default void snapshot(String snapshotName, TableName tableName, * @throws SnapshotCreationException if snapshot failed to be taken * @throws IllegalArgumentException if the snapshot request is formatted incorrectly */ - void snapshot(SnapshotDescription snapshot) - throws IOException, SnapshotCreationException, IllegalArgumentException; + default void snapshot(SnapshotDescription snapshot) + throws IOException, SnapshotCreationException, IllegalArgumentException { + get(snapshotAsync(snapshot), getSyncWaitTimeout(), TimeUnit.MILLISECONDS); + } /** * Take a snapshot without waiting for the server to complete that snapshot (asynchronous) Only a diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index b87874e1994a..30fa3e3b066d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -2512,87 +2512,51 @@ public CompactionState getCompactionStateForRegion(final byte[] regionName) return null; } - @Override - public void snapshot(SnapshotDescription snapshotDesc) - throws IOException, SnapshotCreationException, IllegalArgumentException { - // actually take the snapshot - SnapshotProtos.SnapshotDescription snapshot = - ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotDesc); - SnapshotResponse response = asyncSnapshot(snapshot); - final IsSnapshotDoneRequest request = - IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot).build(); - IsSnapshotDoneResponse done = null; - long start = EnvironmentEdgeManager.currentTime(); - long max = response.getExpectedTimeout(); - long maxPauseTime = max / this.numRetries; - int tries = 0; - LOG.debug("Waiting a max of " + max + " ms for snapshot '" + - ClientSnapshotDescriptionUtils.toString(snapshot) + "'' to complete. (max " + - maxPauseTime + " ms per retry)"); - while (tries == 0 - || ((EnvironmentEdgeManager.currentTime() - start) < max && !done.getDone())) { - try { - // sleep a backoff <= pauseTime amount - long sleep = getPauseTime(tries++); - sleep = sleep > maxPauseTime ? maxPauseTime : sleep; - LOG.debug("(#" + tries + ") Sleeping: " + sleep + - "ms while waiting for snapshot completion."); - Thread.sleep(sleep); - } catch (InterruptedException e) { - throw (InterruptedIOException)new InterruptedIOException("Interrupted").initCause(e); - } - LOG.debug("Getting current status of snapshot from master..."); - done = executeCallable(new MasterCallable(getConnection(), - getRpcControllerFactory()) { - @Override - protected IsSnapshotDoneResponse rpcCall() throws Exception { - return master.isSnapshotDone(getRpcController(), request); - } - }); - } - if (!done.getDone()) { - throw new SnapshotCreationException("Snapshot '" + snapshot.getName() - + "' wasn't completed in expectedTime:" + max + " ms", snapshotDesc); - } - } - @Override public Future snapshotAsync(SnapshotDescription snapshotDesc) throws IOException, SnapshotCreationException { - asyncSnapshot(ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotDesc)); - return new ProcedureFuture(this, null) { + SnapshotResponse resp = asyncSnapshot(ProtobufUtil + .createHBaseProtosSnapshotDesc(snapshotDesc)); + // This is for keeping compatibility with old implementation. + // If there is a procId field in the response, then the snapshot will be operated with a + // SnapshotProcedure, otherwise the snapshot will be coordinated by zk. + if (resp.hasProcId()) { + return new SnapshotFuture(this, snapshotDesc, resp.getProcId()); + } else { + return new SnapshotFuture(this, snapshotDesc, null); + } + } - @Override - protected Void waitOperationResult(long deadlineTs) throws IOException, TimeoutException { - waitForState(deadlineTs, new WaitForStateCallable() { + private static final class SnapshotFuture extends TableFuture { + private final SnapshotDescription snapshotDesc; - @Override - public void throwInterruptedException() throws InterruptedIOException { - throw new InterruptedIOException( - "Interrupted while waiting for taking snapshot" + snapshotDesc); - } + public SnapshotFuture(HBaseAdmin admin, SnapshotDescription snapshotDesc, Long procId) { + super(admin, snapshotDesc.getTableName(), procId); + this.snapshotDesc = snapshotDesc; + } - @Override - public void throwTimeoutException(long elapsedTime) throws TimeoutException { - throw new TimeoutException("Snapshot '" + snapshotDesc.getName() + - "' wasn't completed in expectedTime:" + elapsedTime + " ms"); - } + @Override + public String getOperationType() { + return "SNAPSHOT"; + } - @Override - public boolean checkState(int tries) throws IOException { - return isSnapshotFinished(snapshotDesc); - } - }); - return null; - } - }; + @Override + protected Void waitOperationResult(long deadlineTs) throws IOException, TimeoutException { + waitForState(deadlineTs, new TableWaitForStateCallable() { + @Override + public boolean checkState(int tries) throws IOException { + return getAdmin().isSnapshotFinished(snapshotDesc); + } + }); + return null; + } } private SnapshotResponse asyncSnapshot(SnapshotProtos.SnapshotDescription snapshot) throws IOException { ClientSnapshotDescriptionUtils.assertSnapshotRequestIsValid(snapshot); final SnapshotRequest request = SnapshotRequest.newBuilder().setSnapshot(snapshot) - .build(); + .setNonceGroup(ng.newNonce()).setNonce(ng.newNonce()).build(); // run the snapshot on the master return executeCallable(new MasterCallable(getConnection(), getRpcControllerFactory()) { diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java deleted file mode 100644 index ec2d29d1fa4c..000000000000 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java +++ /dev/null @@ -1,199 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.client; - -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.ipc.HBaseRpcController; -import org.apache.hadoop.hbase.ipc.RpcControllerFactory; -import org.apache.hadoop.hbase.testclassification.ClientTests; -import org.apache.hadoop.hbase.testclassification.SmallTests; -import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TestName; -import org.mockito.Mockito; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; - -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SnapshotResponse; - -/** - * Test snapshot logic from the client - */ -@Category({SmallTests.class, ClientTests.class}) -public class TestSnapshotFromAdmin { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestSnapshotFromAdmin.class); - - private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromAdmin.class); - - @Rule - public TestName name = new TestName(); - - /** - * Test that the logic for doing 'correct' back-off based on exponential increase and the max-time - * passed from the server ensures the correct overall waiting for the snapshot to finish. - * @throws Exception - */ - @Test - public void testBackoffLogic() throws Exception { - final int pauseTime = 100; - final int maxWaitTime = - HConstants.RETRY_BACKOFF[HConstants.RETRY_BACKOFF.length - 1] * pauseTime; - final int numRetries = HConstants.RETRY_BACKOFF.length; - // calculate the wait time, if we just do straight backoff (ignoring the expected time from - // master) - long ignoreExpectedTime = 0; - for (int i = 0; i < HConstants.RETRY_BACKOFF.length; i++) { - ignoreExpectedTime += HConstants.RETRY_BACKOFF[i] * pauseTime; - } - // the correct wait time, capping at the maxTime/tries + fudge room - final long time = pauseTime * 3L + ((maxWaitTime / numRetries) * 3) + 300L; - assertTrue("Capped snapshot wait time isn't less that the uncapped backoff time " - + "- further testing won't prove anything.", time < ignoreExpectedTime); - - // setup the mocks - ConnectionImplementation mockConnection = Mockito - .mock(ConnectionImplementation.class); - Configuration conf = HBaseConfiguration.create(); - // setup the conf to match the expected properties - conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, numRetries); - conf.setLong("hbase.client.pause", pauseTime); - - // mock the master admin to our mock - MasterKeepAliveConnection mockMaster = Mockito.mock(MasterKeepAliveConnection.class); - Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); - Mockito.when(mockConnection.getMaster()).thenReturn(mockMaster); - // we need a real retrying caller - RpcRetryingCallerFactory callerFactory = new RpcRetryingCallerFactory(conf); - RpcControllerFactory controllerFactory = Mockito.mock(RpcControllerFactory.class); - Mockito.when(controllerFactory.newController()).thenReturn( - Mockito.mock(HBaseRpcController.class)); - Mockito.when(mockConnection.getRpcRetryingCallerFactory()).thenReturn(callerFactory); - Mockito.when(mockConnection.getRpcControllerFactory()).thenReturn(controllerFactory); - // set the max wait time for the snapshot to complete - SnapshotResponse response = SnapshotResponse.newBuilder() - .setExpectedTimeout(maxWaitTime) - .build(); - Mockito - .when( - mockMaster.snapshot((RpcController) Mockito.any(), - Mockito.any())).thenReturn(response); - // setup the response - IsSnapshotDoneResponse.Builder builder = IsSnapshotDoneResponse.newBuilder(); - builder.setDone(false); - // first five times, we return false, last we get success - Mockito.when( - mockMaster.isSnapshotDone((RpcController) Mockito.any(), - Mockito.any())).thenReturn(builder.build(), builder.build(), - builder.build(), builder.build(), builder.build(), builder.setDone(true).build()); - - // setup the admin and run the test - Admin admin = new HBaseAdmin(mockConnection); - String snapshot = "snapshot"; - final TableName table = TableName.valueOf(name.getMethodName()); - // get start time - long start = EnvironmentEdgeManager.currentTime(); - admin.snapshot(snapshot, table); - long finish = EnvironmentEdgeManager.currentTime(); - long elapsed = (finish - start); - assertTrue("Elapsed time:" + elapsed + " is more than expected max:" + time, elapsed <= time); - admin.close(); - } - - /** - * Make sure that we validate the snapshot name and the table name before we pass anything across - * the wire - * @throws Exception on failure - */ - @Test - public void testValidateSnapshotName() throws Exception { - ConnectionImplementation mockConnection = Mockito - .mock(ConnectionImplementation.class); - Configuration conf = HBaseConfiguration.create(); - Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); - // we need a real retrying caller - RpcRetryingCallerFactory callerFactory = new RpcRetryingCallerFactory(conf); - RpcControllerFactory controllerFactory = Mockito.mock(RpcControllerFactory.class); - Mockito.when(controllerFactory.newController()).thenReturn( - Mockito.mock(HBaseRpcController.class)); - Mockito.when(mockConnection.getRpcRetryingCallerFactory()).thenReturn(callerFactory); - Mockito.when(mockConnection.getRpcControllerFactory()).thenReturn(controllerFactory); - Admin admin = new HBaseAdmin(mockConnection); - // check that invalid snapshot names fail - failSnapshotStart(admin, new SnapshotDescription(HConstants.SNAPSHOT_DIR_NAME)); - failSnapshotStart(admin, new SnapshotDescription("-snapshot")); - failSnapshotStart(admin, new SnapshotDescription("snapshot fails")); - failSnapshotStart(admin, new SnapshotDescription("snap$hot")); - failSnapshotStart(admin, new SnapshotDescription("snap:hot")); - // check the table name also get verified - failSnapshotDescriptorCreation("snapshot", ".table"); - failSnapshotDescriptorCreation("snapshot", "-table"); - failSnapshotDescriptorCreation("snapshot", "table fails"); - failSnapshotDescriptorCreation("snapshot", "tab%le"); - - // mock the master connection - MasterKeepAliveConnection master = Mockito.mock(MasterKeepAliveConnection.class); - Mockito.when(mockConnection.getMaster()).thenReturn(master); - SnapshotResponse response = SnapshotResponse.newBuilder().setExpectedTimeout(0).build(); - Mockito.when( - master.snapshot((RpcController) Mockito.any(), Mockito.any())) - .thenReturn(response); - IsSnapshotDoneResponse doneResponse = IsSnapshotDoneResponse.newBuilder().setDone(true).build(); - Mockito.when( - master.isSnapshotDone((RpcController) Mockito.any(), - Mockito.any())).thenReturn(doneResponse); - - // make sure that we can use valid names - admin.snapshot(new SnapshotDescription("snapshot", TableName.valueOf(name.getMethodName()))); - } - - private void failSnapshotStart(Admin admin, SnapshotDescription snapshot) - throws IOException { - try { - admin.snapshot(snapshot); - fail("Snapshot should not have succeed with name:" + snapshot.getName()); - } catch (IllegalArgumentException e) { - LOG.debug("Correctly failed to start snapshot:" + e.getMessage()); - } - } - - private void failSnapshotDescriptorCreation(final String snapshotName, final String tableName) { - try { - new SnapshotDescription(snapshotName, tableName); - fail("SnapshotDescription should not have succeed with name:" + snapshotName); - } catch (IllegalArgumentException e) { - LOG.debug("Correctly failed to create SnapshotDescription:" + e.getMessage()); - } - } -} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java index 7d63f928ce80..3a3ccf709c40 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java @@ -18,50 +18,30 @@ package org.apache.hadoop.hbase.master.procedure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.SnapshotDescription; import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure; -import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.procedure2.Procedure; -import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; -import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; -import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.RegionSplitter; -import org.apache.hadoop.hdfs.DistributedFileSystem; import org.junit.After; import org.junit.Before; import org.junit.ClassRule; -import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.Mockito; import org.mockito.internal.stubbing.answers.AnswersWithDelay; @@ -69,27 +49,25 @@ import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState; -import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; @Category({ MasterTests.class, MediumTests.class }) public class TestSnapshotProcedure { - private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotProcedure.class); + protected static final Logger LOG = LoggerFactory.getLogger(TestSnapshotProcedure.class); @ClassRule public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestSnapshotProcedure.class); - private static HBaseTestingUtility TEST_UTIL; - private HMaster master; - private TableName TABLE_NAME; - private byte[] CF; - private String SNAPSHOT_NAME; - private SnapshotDescription snapshot; - private SnapshotProtos.SnapshotDescription snapshotProto; + protected static HBaseTestingUtility TEST_UTIL; + protected HMaster master; + protected TableName TABLE_NAME; + protected byte[] CF; + protected String SNAPSHOT_NAME; + protected SnapshotDescription snapshot; + protected SnapshotProtos.SnapshotDescription snapshotProto; @Before public void setup() throws Exception { @@ -117,96 +95,6 @@ public void setup() throws Exception { TEST_UTIL.loadTable(table, CF, false); } - @Test - public void testSimpleSnapshotTable() throws Exception { - TEST_UTIL.getAdmin().snapshot(snapshot); - SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - } - - @Test - public void testMasterRestart() throws Exception { - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - MasterProcedureEnv env = procExec.getEnvironment(); - SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); - SnapshotProcedure spySp = getDelayedOnSpecificStateSnapshotProcedure(sp, - procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); - - long procId = procExec.submitProcedure(spySp); - - TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures() - .stream().map(Procedure::getProcId).collect(Collectors.toList()).contains(procId)); - TEST_UTIL.getHBaseCluster().killMaster(master.getServerName()); - TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000); - TEST_UTIL.getHBaseCluster().startMaster(); - TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(); - - master = TEST_UTIL.getHBaseCluster().getMaster(); - assertTrue(master.getSnapshotManager().isTakingAnySnapshot()); - assertTrue(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME)); - - List unfinishedProcedures = master - .getMasterProcedureExecutor().getProcedures().stream() - .filter(p -> p instanceof SnapshotProcedure) - .filter(p -> !p.isFinished()).map(p -> (SnapshotProcedure) p) - .collect(Collectors.toList()); - assertEquals(unfinishedProcedures.size(), 1); - long newProcId = unfinishedProcedures.get(0).getProcId(); - assertEquals(procId, newProcId); - - ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), newProcId); - assertFalse(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME)); - - List snapshots - = master.getSnapshotManager().getCompletedSnapshots(); - assertEquals(1, snapshots.size()); - assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName()); - assertEquals(TABLE_NAME, TableName.valueOf(snapshots.get(0).getTable())); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - } - - @Test - public void testRegionServerCrashWhileTakingSnapshot() throws Exception { - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - MasterProcedureEnv env = procExec.getEnvironment(); - SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); - long procId = procExec.submitProcedure(sp); - - SnapshotRegionProcedure snp = waitProcedureRunnableAndGetFirst( - SnapshotRegionProcedure.class, 60000); - ServerName targetServer = env.getAssignmentManager().getRegionStates() - .getRegionStateNode(snp.getRegion()).getRegionLocation(); - TEST_UTIL.getHBaseCluster().killRegionServer(targetServer); - - TEST_UTIL.waitFor(60000, () -> snp.inRetrying()); - ProcedureTestingUtility.waitProcedure(procExec, procId); - - SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - } - - @Test - public void testRegionServerCrashWhileVerifyingSnapshot() throws Exception { - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - MasterProcedureEnv env = procExec.getEnvironment(); - SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); - long procId = procExec.submitProcedure(sp); - - SnapshotVerifyProcedure svp = waitProcedureRunnableAndGetFirst( - SnapshotVerifyProcedure.class, 60000); - TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null); - ServerName previousTargetServer = svp.getServerName(); - - HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(previousTargetServer); - TEST_UTIL.getHBaseCluster().killRegionServer(rs.getServerName()); - TEST_UTIL.waitFor(60000, () -> svp.getServerName() != null - && !svp.getServerName().equals(previousTargetServer)); - ProcedureTestingUtility.waitProcedure(procExec, procId); - - SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - } - public > T waitProcedureRunnableAndGetFirst( Class clazz, long timeout) throws IOException { TEST_UTIL.waitFor(timeout, () -> master.getProcedures().stream() @@ -217,181 +105,7 @@ public > T waitProcedureRunnableAndGetFi return procOpt.get(); } - @Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class) - public void testClientTakingTwoSnapshotOnSameTable() throws Exception { - Thread first = new Thread("first-client") { - @Override - public void run() { - try { - TEST_UTIL.getAdmin().snapshot(snapshot); - } catch (IOException e) { - LOG.error("first client failed taking snapshot", e); - fail("first client failed taking snapshot"); - } - } - }; - first.start(); - Thread.sleep(1000); - // we don't allow different snapshot with same name - SnapshotDescription snapshotWithSameName = - new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, SnapshotType.SKIPFLUSH); - TEST_UTIL.getAdmin().snapshot(snapshotWithSameName); - } - - @Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class) - public void testClientTakeSameSnapshotTwice() throws IOException, InterruptedException { - Thread first = new Thread("first-client") { - @Override - public void run() { - try { - TEST_UTIL.getAdmin().snapshot(snapshot); - } catch (IOException e) { - LOG.error("first client failed taking snapshot", e); - fail("first client failed taking snapshot"); - } - } - }; - first.start(); - Thread.sleep(1000); - TEST_UTIL.getAdmin().snapshot(snapshot); - } - - @Test - public void testTakeZkCoordinatedSnapshotAndProcedureCoordinatedSnapshotBoth() throws Exception { - String newSnapshotName = SNAPSHOT_NAME + "_2"; - Thread first = new Thread("procedure-snapshot") { - @Override - public void run() { - try { - TEST_UTIL.getAdmin().snapshot(snapshot); - } catch (IOException e) { - LOG.error("procedure snapshot failed", e); - fail("procedure snapshot failed"); - } - } - }; - first.start(); - Thread.sleep(1000); - - SnapshotManager sm = master.getSnapshotManager(); - TEST_UTIL.waitFor(2000, 50, () -> !sm.isTakingSnapshot(TABLE_NAME) - && sm.isTableTakingAnySnapshot(TABLE_NAME)); - - TEST_UTIL.getConfiguration().setBoolean("hbase.snapshot.zk.coordinated", true); - SnapshotDescription snapshotOnSameTable = - new SnapshotDescription(newSnapshotName, TABLE_NAME, SnapshotType.SKIPFLUSH); - SnapshotProtos.SnapshotDescription snapshotOnSameTableProto = ProtobufUtil - .createHBaseProtosSnapshotDesc(snapshotOnSameTable); - Thread second = new Thread("zk-snapshot") { - @Override - public void run() { - try { - master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto); - } catch (IOException e) { - LOG.error("zk snapshot failed", e); - fail("zk snapshot failed"); - } - } - }; - second.start(); - - TEST_UTIL.waitFor(2000, () -> sm.isTakingSnapshot(TABLE_NAME)); - TEST_UTIL.waitFor(60000, () -> sm.isSnapshotDone(snapshotOnSameTableProto) - && !sm.isTakingAnySnapshot()); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, CF); - } - - @Test - public void testRunningTwoSnapshotProcedureOnSameTable() throws Exception { - String newSnapshotName = SNAPSHOT_NAME + "_2"; - SnapshotProtos.SnapshotDescription snapshotProto2 = SnapshotProtos.SnapshotDescription - .newBuilder(snapshotProto).setName(newSnapshotName).build(); - - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - MasterProcedureEnv env = procExec.getEnvironment(); - - SnapshotProcedure sp1 = new SnapshotProcedure(env, snapshotProto); - SnapshotProcedure sp2 = new SnapshotProcedure(env, snapshotProto2); - SnapshotProcedure spySp1 = getDelayedOnSpecificStateSnapshotProcedure(sp1, - procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); - SnapshotProcedure spySp2 = getDelayedOnSpecificStateSnapshotProcedure(sp2, - procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); - - long procId1 = procExec.submitProcedure(spySp1); - long procId2 = procExec.submitProcedure(spySp2); - TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures() - .stream().map(Procedure::getProcId).collect(Collectors.toList()) - .containsAll(Arrays.asList(procId1, procId2))); - - assertFalse(procExec.isFinished(procId1)); - assertFalse(procExec.isFinished(procId2)); - - ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId1); - ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId2); - - List snapshots = - master.getSnapshotManager().getCompletedSnapshots(); - assertEquals(2, snapshots.size()); - snapshots.sort(Comparator.comparing(SnapshotProtos.SnapshotDescription::getName)); - assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName()); - assertEquals(newSnapshotName, snapshots.get(1).getName()); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto2, TABLE_NAME, CF); - } - - @Test - public void testTableInMergeWhileTakingSnapshot() throws Exception { - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - List regions = master.getAssignmentManager().getTableRegions(TABLE_NAME, true) - .stream().sorted(RegionInfo.COMPARATOR).collect(Collectors.toList()); - MergeTableRegionsProcedure mergeProc = new MergeTableRegionsProcedure( - procExec.getEnvironment(), new RegionInfo[] {regions.get(0), regions.get(1)}, false); - long mergeProcId = procExec.submitProcedure(mergeProc); - // wait until merge region procedure running - TEST_UTIL.waitFor(10000, () -> - procExec.getProcedure(mergeProcId).getState() == ProcedureState.RUNNABLE); - SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto); - long snapshotProcId = procExec.submitProcedure(sp); - TEST_UTIL.waitFor(2000, 1000, () -> procExec.getProcedure(snapshotProcId) != null && - procExec.getProcedure(snapshotProcId).getState() == ProcedureState.WAITING_TIMEOUT); - ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId); - SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); - } - - @Test - public void testSnapshotCorruptedAndRollback() throws Exception { - ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto); - procExec.submitProcedure(sp); - TEST_UTIL.waitFor(60000, 500, () -> sp.getCurrentStateId() > - SnapshotState.SNAPSHOT_CONSOLIDATE_SNAPSHOT_VALUE); - DistributedFileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem(); - Optional region = TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME).stream() - .filter(r -> !r.getStoreFileList(new byte[][] { CF }).isEmpty()) - .findFirst(); - assertTrue(region.isPresent()); - region.get().getStoreFileList(new byte[][] { CF }).forEach(s -> { - try { - // delete real data files to trigger the CorruptedSnapshotException - dfs.delete(new Path(s), true); - LOG.info("delete {} to make snapshot corrupt", s); - } catch (Exception e) { - LOG.warn("Failed delete {} to make snapshot corrupt", s, e); - } - } - ); - TEST_UTIL.waitFor(60000, () -> sp.isFailed() && sp.isFinished()); - Configuration conf = master.getConfiguration(); - Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir( - snapshotProto, CommonFSUtils.getRootDir(conf), conf); - assertFalse(dfs.exists(workingDir)); - assertFalse(master.getSnapshotManager().isTakingSnapshot(TABLE_NAME)); - assertFalse(master.getSnapshotManager().isTakingAnySnapshot()); - } - - - private SnapshotProcedure getDelayedOnSpecificStateSnapshotProcedure( + protected SnapshotProcedure getDelayedOnSpecificStateSnapshotProcedure( SnapshotProcedure sp, MasterProcedureEnv env, SnapshotState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { SnapshotProcedure spySp = Mockito.spy(sp); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureBasicSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureBasicSnapshot.java new file mode 100644 index 000000000000..8e5c0f8e5c25 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureBasicSnapshot.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.fail; + +import java.io.IOException; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureBasicSnapshot extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureBasicSnapshot.class); + + @Test + public void testSimpleSnapshotTable() throws Exception { + TEST_UTIL.getAdmin().snapshot(snapshot); + SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + } + + @Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class) + public void testClientTakingTwoSnapshotOnSameTable() throws Exception { + Thread first = new Thread("first-client") { + @Override + public void run() { + try { + TEST_UTIL.getAdmin().snapshot(snapshot); + } catch (IOException e) { + LOG.error("first client failed taking snapshot", e); + fail("first client failed taking snapshot"); + } + } + }; + first.start(); + Thread.sleep(1000); + // we don't allow different snapshot with same name + SnapshotDescription snapshotWithSameName = + new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, SnapshotType.SKIPFLUSH); + TEST_UTIL.getAdmin().snapshot(snapshotWithSameName); + } + + @Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class) + public void testClientTakeSameSnapshotTwice() throws IOException, InterruptedException { + Thread first = new Thread("first-client") { + @Override + public void run() { + try { + TEST_UTIL.getAdmin().snapshot(snapshot); + } catch (IOException e) { + LOG.error("first client failed taking snapshot", e); + fail("first client failed taking snapshot"); + } + } + }; + first.start(); + Thread.sleep(1000); + TEST_UTIL.getAdmin().snapshot(snapshot); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureConcurrently.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureConcurrently.java new file mode 100644 index 000000000000..4f2be1f116fc --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureConcurrently.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureConcurrently extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureConcurrently.class); + + @Test + public void testRunningTwoSnapshotProcedureOnSameTable() throws Exception { + String newSnapshotName = SNAPSHOT_NAME + "_2"; + SnapshotProtos.SnapshotDescription snapshotProto2 = SnapshotProtos.SnapshotDescription + .newBuilder(snapshotProto).setName(newSnapshotName).build(); + + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + MasterProcedureEnv env = procExec.getEnvironment(); + + SnapshotProcedure sp1 = new SnapshotProcedure(env, snapshotProto); + SnapshotProcedure sp2 = new SnapshotProcedure(env, snapshotProto2); + SnapshotProcedure spySp1 = getDelayedOnSpecificStateSnapshotProcedure(sp1, + procExec.getEnvironment(), MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); + SnapshotProcedure spySp2 = getDelayedOnSpecificStateSnapshotProcedure(sp2, + procExec.getEnvironment(), MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); + + long procId1 = procExec.submitProcedure(spySp1); + long procId2 = procExec.submitProcedure(spySp2); + TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures() + .stream().map(Procedure::getProcId).collect(Collectors.toList()) + .containsAll(Arrays.asList(procId1, procId2))); + + assertFalse(procExec.isFinished(procId1)); + assertFalse(procExec.isFinished(procId2)); + + ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId1); + ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId2); + + List snapshots = + master.getSnapshotManager().getCompletedSnapshots(); + assertEquals(2, snapshots.size()); + snapshots.sort(Comparator.comparing(SnapshotProtos.SnapshotDescription::getName)); + assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName()); + assertEquals(newSnapshotName, snapshots.get(1).getName()); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto2, TABLE_NAME, CF); + } + + @Test + public void testTakeZkCoordinatedSnapshotAndProcedureCoordinatedSnapshotBoth() throws Exception { + String newSnapshotName = SNAPSHOT_NAME + "_2"; + Thread first = new Thread("procedure-snapshot") { + @Override + public void run() { + try { + TEST_UTIL.getAdmin().snapshot(snapshot); + } catch (IOException e) { + LOG.error("procedure snapshot failed", e); + fail("procedure snapshot failed"); + } + } + }; + first.start(); + Thread.sleep(1000); + + SnapshotManager sm = master.getSnapshotManager(); + TEST_UTIL.waitFor(2000, 50, () -> !sm.isTakingSnapshot(TABLE_NAME) + && sm.isTableTakingAnySnapshot(TABLE_NAME)); + + TEST_UTIL.getConfiguration().setBoolean("hbase.snapshot.zk.coordinated", true); + SnapshotDescription snapshotOnSameTable = + new SnapshotDescription(newSnapshotName, TABLE_NAME, SnapshotType.SKIPFLUSH); + SnapshotProtos.SnapshotDescription snapshotOnSameTableProto = ProtobufUtil + .createHBaseProtosSnapshotDesc(snapshotOnSameTable); + Thread second = new Thread("zk-snapshot") { + @Override + public void run() { + try { + master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto); + } catch (IOException e) { + LOG.error("zk snapshot failed", e); + fail("zk snapshot failed"); + } + } + }; + second.start(); + + TEST_UTIL.waitFor(2000, () -> sm.isTakingSnapshot(TABLE_NAME)); + TEST_UTIL.waitFor(60000, () -> sm.isSnapshotDone(snapshotOnSameTableProto) + && !sm.isTakingAnySnapshot()); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, CF); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureMasterRestarts.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureMasterRestarts.java new file mode 100644 index 000000000000..b1551fedeb70 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureMasterRestarts.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureMasterRestarts extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureMasterRestarts.class); + + @Test + public void testMasterRestarts() throws Exception { + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + MasterProcedureEnv env = procExec.getEnvironment(); + SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); + SnapshotProcedure spySp = getDelayedOnSpecificStateSnapshotProcedure(sp, + procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS); + + long procId = procExec.submitProcedure(spySp); + + TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures() + .stream().map(Procedure::getProcId).collect(Collectors.toList()).contains(procId)); + TEST_UTIL.getHBaseCluster().killMaster(master.getServerName()); + TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000); + TEST_UTIL.getHBaseCluster().startMaster(); + TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(); + + master = TEST_UTIL.getHBaseCluster().getMaster(); + assertTrue(master.getSnapshotManager().isTakingAnySnapshot()); + assertTrue(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME)); + + List unfinishedProcedures = master + .getMasterProcedureExecutor().getProcedures().stream() + .filter(p -> p instanceof SnapshotProcedure) + .filter(p -> !p.isFinished()).map(p -> (SnapshotProcedure) p) + .collect(Collectors.toList()); + assertEquals(unfinishedProcedures.size(), 1); + long newProcId = unfinishedProcedures.get(0).getProcId(); + assertEquals(procId, newProcId); + + ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), newProcId); + assertFalse(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME)); + + List snapshots + = master.getSnapshotManager().getCompletedSnapshots(); + assertEquals(1, snapshots.size()); + assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName()); + assertEquals(TABLE_NAME, TableName.valueOf(snapshots.get(0).getTable())); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRIT.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRIT.java new file mode 100644 index 000000000000..73f913e791f9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRIT.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import java.util.List; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureRIT extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureRIT.class); + + @Test + public void testTableInMergeWhileTakingSnapshot() throws Exception { + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + List regions = master.getAssignmentManager().getTableRegions(TABLE_NAME, true) + .stream().sorted(RegionInfo.COMPARATOR).collect(Collectors.toList()); + MergeTableRegionsProcedure mergeProc = new MergeTableRegionsProcedure( + procExec.getEnvironment(), new RegionInfo[] {regions.get(0), regions.get(1)}, false); + long mergeProcId = procExec.submitProcedure(mergeProc); + // wait until merge region procedure running + TEST_UTIL.waitFor(10000, () -> + procExec.getProcedure(mergeProcId).getState() == ProcedureState.RUNNABLE); + SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto); + long snapshotProcId = procExec.submitProcedure(sp); + TEST_UTIL.waitFor(2000, 1000, () -> procExec.getProcedure(snapshotProcId) != null && + procExec.getProcedure(snapshotProcId).getState() == ProcedureState.WAITING_TIMEOUT); + ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRSCrashes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRSCrashes.java new file mode 100644 index 000000000000..084c007c8447 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureRSCrashes.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureRSCrashes extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureRSCrashes.class); + + @Test + public void testRegionServerCrashWhileTakingSnapshot() throws Exception { + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + MasterProcedureEnv env = procExec.getEnvironment(); + SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); + long procId = procExec.submitProcedure(sp); + + SnapshotRegionProcedure snp = waitProcedureRunnableAndGetFirst( + SnapshotRegionProcedure.class, 60000); + ServerName targetServer = env.getAssignmentManager().getRegionStates() + .getRegionStateNode(snp.getRegion()).getRegionLocation(); + TEST_UTIL.getHBaseCluster().killRegionServer(targetServer); + + TEST_UTIL.waitFor(60000, () -> snp.inRetrying()); + ProcedureTestingUtility.waitProcedure(procExec, procId); + + SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + } + + @Test + public void testRegionServerCrashWhileVerifyingSnapshot() throws Exception { + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + MasterProcedureEnv env = procExec.getEnvironment(); + SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto); + long procId = procExec.submitProcedure(sp); + + SnapshotVerifyProcedure svp = waitProcedureRunnableAndGetFirst( + SnapshotVerifyProcedure.class, 60000); + TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null); + ServerName previousTargetServer = svp.getServerName(); + + HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(previousTargetServer); + TEST_UTIL.getHBaseCluster().killRegionServer(rs.getServerName()); + TEST_UTIL.waitFor(60000, () -> svp.getServerName() != null + && !svp.getServerName().equals(previousTargetServer)); + ProcedureTestingUtility.waitProcedure(procExec, procId); + + SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto); + SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureSnapshotCorrupted.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureSnapshotCorrupted.java new file mode 100644 index 000000000000..f177a44e99d9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureSnapshotCorrupted.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Optional; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSnapshotProcedureSnapshotCorrupted extends TestSnapshotProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotProcedureSnapshotCorrupted.class); + + @Test + public void testSnapshotCorruptedAndRollback() throws Exception { + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto); + procExec.submitProcedure(sp); + TEST_UTIL.waitFor(60000, 500, () -> sp.getCurrentStateId() > + MasterProcedureProtos.SnapshotState.SNAPSHOT_CONSOLIDATE_SNAPSHOT_VALUE); + DistributedFileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem(); + Optional region = TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME).stream() + .filter(r -> !r.getStoreFileList(new byte[][] { CF }).isEmpty()) + .findFirst(); + assertTrue(region.isPresent()); + region.get().getStoreFileList(new byte[][] { CF }).forEach(s -> { + try { + // delete real data files to trigger the CorruptedSnapshotException + dfs.delete(new Path(s), true); + LOG.info("delete {} to make snapshot corrupt", s); + } catch (Exception e) { + LOG.warn("Failed delete {} to make snapshot corrupt", s, e); + } + } + ); + TEST_UTIL.waitFor(60000, () -> sp.isFailed() && sp.isFinished()); + Configuration conf = master.getConfiguration(); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir( + snapshotProto, CommonFSUtils.getRootDir(conf), conf); + assertFalse(dfs.exists(workingDir)); + assertFalse(master.getSnapshotManager().isTakingSnapshot(TABLE_NAME)); + assertFalse(master.getSnapshotManager().isTakingAnySnapshot()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java index a19bf4f6a127..907ef9178e2c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java @@ -21,7 +21,6 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import com.google.protobuf.ServiceException; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -75,7 +74,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; @@ -271,33 +269,6 @@ public void storeFile(final RegionInfo regionInfo, final String family, } } - /** - * Helper method for testing async snapshot operations. Just waits for the - * given snapshot to complete on the server by repeatedly checking the master. - * - * @param master the master running the snapshot - * @param snapshot the snapshot to check - * @param sleep amount to sleep between checks to see if the snapshot is done - * @throws ServiceException if the snapshot fails - * @throws org.apache.hbase.thirdparty.com.google.protobuf.ServiceException - */ - public static void waitForSnapshotToComplete(HMaster master, - SnapshotProtos.SnapshotDescription snapshot, long sleep) - throws org.apache.hbase.thirdparty.com.google.protobuf.ServiceException { - final IsSnapshotDoneRequest request = IsSnapshotDoneRequest.newBuilder() - .setSnapshot(snapshot).build(); - IsSnapshotDoneResponse done = IsSnapshotDoneResponse.newBuilder() - .buildPartial(); - while (!done.getDone()) { - done = master.getMasterRpcServices().isSnapshotDone(null, request); - try { - Thread.sleep(sleep); - } catch (InterruptedException e) { - throw new org.apache.hbase.thirdparty.com.google.protobuf.ServiceException(e); - } - } - } - /* * Take snapshot with maximum of numTries attempts, ignoring CorruptedSnapshotException * except for the last CorruptedSnapshotException diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java index 4327d8fac15a..9b65f0ce61d7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java @@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.client.SnapshotDescription; import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -276,12 +275,8 @@ public void testAsyncFlushSnapshot() throws Exception { .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build(); // take the snapshot async - admin.takeSnapshotAsync( - new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); - - // constantly loop, looking for the snapshot to complete - HMaster master = UTIL.getMiniHBaseCluster().getMaster(); - SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200); + admin.snapshotAsync( + new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)).get(); LOG.info(" === Async Snapshot Completed ==="); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);