From 6a7313b5173530f728b3a3d0296d430c8e376fac Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Thu, 16 May 2024 08:44:51 -0400 Subject: [PATCH] HBASE-26625 ExportSnapshot tool failed to copy data files for tables with merge region (#3981) (#93) Signed-off-by: Duo Zhang Co-authored-by: meiyi --- .../hadoop/hbase/snapshot/ExportSnapshot.java | 37 +++++++----- .../hbase/snapshot/TestExportSnapshot.java | 57 ++++++++++++++++++- .../TestExportSnapshotV1NoCluster.java | 19 ++++++- .../hbase/regionserver/StoreFileInfo.java | 19 +++++++ 4 files changed, 115 insertions(+), 17 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index efacde37b9b7..7fb9c14aa50c 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mob.MobUtils; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.AbstractHBaseTool; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -574,29 +575,39 @@ private static List> getSnapshotFiles(final Configu @Override public void storeFile(final RegionInfo regionInfo, final String family, final SnapshotRegionManifest.StoreFile storeFile) throws IOException { - // for storeFile.hasReference() case, copied as part of the manifest + Pair snapshotFileAndSize = null; if (!storeFile.hasReference()) { String region = regionInfo.getEncodedName(); String hfile = storeFile.getName(); - Path path = HFileLink.createPath(table, region, family, hfile); - - SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder() - .setType(SnapshotFileInfo.Type.HFILE).setHfile(path.toString()).build(); - - long size; - if (storeFile.hasFileSize()) { - size = storeFile.getFileSize(); - } else { - size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen(); - } - files.add(new Pair<>(fileInfo, size)); + snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, region, family, hfile, + storeFile.hasFileSize() ? storeFile.getFileSize() : -1); + } else { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(storeFile.getName()); + String referencedRegion = referredToRegionAndFile.getFirst(); + String referencedHFile = referredToRegionAndFile.getSecond(); + snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, referencedRegion, family, + referencedHFile, storeFile.hasFileSize() ? storeFile.getFileSize() : -1); } + files.add(snapshotFileAndSize); } }); return files; } + private static Pair getSnapshotFileAndSize(FileSystem fs, + Configuration conf, TableName table, String region, String family, String hfile, long size) + throws IOException { + Path path = HFileLink.createPath(table, region, family, hfile); + SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder().setType(SnapshotFileInfo.Type.HFILE) + .setHfile(path.toString()).build(); + if (size == -1) { + size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen(); + } + return new Pair<>(fileInfo, size); + } + /** * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible. * The groups created will have similar amounts of bytes. diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java index a9bd94f713b6..5ce670c0cb0c 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java @@ -31,6 +31,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -40,13 +41,19 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -60,6 +67,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; + import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; @@ -162,6 +171,39 @@ public void testExportFileSystemState() throws Exception { testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles); } + @Test + public void testExportFileSystemStateWithMergeRegion() throws Exception { + // disable compaction + admin.compactionSwitch(false, + admin.getRegionServers().stream().map(a -> a.getServerName()).collect(Collectors.toList())); + // create Table + TableName tableName0 = TableName.valueOf("testtb-" + testName.getMethodName() + "-1"); + byte[] snapshotName0 = Bytes.toBytes("snaptb0-" + testName.getMethodName() + "-1"); + admin.createTable( + TableDescriptorBuilder.newBuilder(tableName0) + .setColumnFamilies( + Lists.newArrayList(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build())) + .build(), + new byte[][] { Bytes.toBytes("2") }); + // put some data + try (Table table = admin.getConnection().getTable(tableName0)) { + table.put(new Put(Bytes.toBytes("1")).addColumn(FAMILY, null, Bytes.toBytes("1"))); + table.put(new Put(Bytes.toBytes("2")).addColumn(FAMILY, null, Bytes.toBytes("2"))); + } + List regions = admin.getRegions(tableName0); + assertEquals(2, regions.size()); + tableNumFiles = regions.size(); + // merge region + admin.mergeRegionsAsync(new byte[][] { regions.get(0).getEncodedNameAsBytes(), + regions.get(1).getEncodedNameAsBytes() }, true).get(); + // take a snapshot + admin.snapshot(snapshotName0, tableName0); + // export snapshot and verify + testExportFileSystemState(tableName0, snapshotName0, snapshotName0, tableNumFiles); + // delete table + TEST_UTIL.deleteTable(tableName0); + } + @Test public void testExportFileSystemStateWithSkipTmp() throws Exception { TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true); @@ -330,12 +372,21 @@ public void storeFile(final RegionInfo regionInfo, final String family, return; } - String hfile = storeFile.getName(); - snapshotFiles.add(hfile); - if (!storeFile.hasReference()) { + if (!storeFile.hasReference() && !StoreFileInfo.isReference(storeFile.getName())) { + String hfile = storeFile.getName(); + snapshotFiles.add(hfile); verifyNonEmptyFile(new Path(exportedArchive, new Path(CommonFSUtils.getTableDir(new Path("./"), tableName), new Path(regionInfo.getEncodedName(), new Path(family, hfile))))); + } else { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(storeFile.getName()); + String region = referredToRegionAndFile.getFirst(); + String hfile = referredToRegionAndFile.getSecond(); + snapshotFiles.add(hfile); + verifyNonEmptyFile(new Path(exportedArchive, + new Path(CommonFSUtils.getTableDir(new Path("./"), tableName), + new Path(region, new Path(family, hfile))))); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java index 7d27bfea79a0..576834deb8c2 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java @@ -20,6 +20,8 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; @@ -28,11 +30,13 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock; import org.apache.hadoop.hbase.testclassification.MapReduceTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; @@ -104,7 +108,20 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs, Path[] r1Files = builder.addRegion(); Path[] r2Files = builder.addRegion(); builder.commit(); - int snapshotFilesCount = r1Files.length + r2Files.length; + // remove references, only keep data files + Set dataFiles = new HashSet<>(); + for (Path[] files : new Path[][] { r1Files, r2Files }) { + for (Path file : files) { + if (StoreFileInfo.isReference(file.getName())) { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(file.getName()); + dataFiles.add(referredToRegionAndFile.getSecond()); + } else { + dataFiles.add(file.getName()); + } + } + } + int snapshotFilesCount = dataFiles.size(); byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName()); TableName tableName = builder.getTableDescriptor().getTableName(); TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java index 28713cee3bcf..307f7b750444 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -567,6 +568,24 @@ public static Path getReferredToFile(final Path p) { nameStrippedOfSuffix); } + /* + * Return region and file name referred to by a Reference. + * @param referenceFile HFile name which is a Reference. + * @return Calculated referenced region and file name. + * @throws IllegalArgumentException when referenceFile regex fails to match. + */ + public static Pair getReferredToRegionAndFile(final String referenceFile) { + Matcher m = REF_NAME_PATTERN.matcher(referenceFile); + if (m == null || !m.matches()) { + LOG.warn("Failed match of store file name {}", referenceFile); + throw new IllegalArgumentException("Failed match of store file name " + referenceFile); + } + String referencedRegion = m.group(2); + String referencedFile = m.group(1); + LOG.trace("reference {} to region={} file={}", referenceFile, referencedRegion, referencedFile); + return new Pair<>(referencedRegion, referencedFile); + } + /** * Validate the store file name. * @param fileName name of the file to validate