diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 42881be5ff4e..17086579aa1c 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mob.MobUtils; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.AbstractHBaseTool; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -584,31 +585,39 @@ private static List> getSnapshotFiles(final Configu @Override public void storeFile(final RegionInfo regionInfo, final String family, final SnapshotRegionManifest.StoreFile storeFile) throws IOException { - // for storeFile.hasReference() case, copied as part of the manifest + Pair snapshotFileAndSize = null; if (!storeFile.hasReference()) { String region = regionInfo.getEncodedName(); String hfile = storeFile.getName(); - Path path = HFileLink.createPath(table, region, family, hfile); - - SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder() - .setType(SnapshotFileInfo.Type.HFILE) - .setHfile(path.toString()) - .build(); - - long size; - if (storeFile.hasFileSize()) { - size = storeFile.getFileSize(); - } else { - size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen(); - } - files.add(new Pair<>(fileInfo, size)); + snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, region, family, hfile, + storeFile.hasFileSize() ? storeFile.getFileSize() : -1); + } else { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(storeFile.getName()); + String referencedRegion = referredToRegionAndFile.getFirst(); + String referencedHFile = referredToRegionAndFile.getSecond(); + snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, referencedRegion, family, + referencedHFile, storeFile.hasFileSize() ? storeFile.getFileSize() : -1); } + files.add(snapshotFileAndSize); } - }); + }); return files; } + private static Pair getSnapshotFileAndSize(FileSystem fs, + Configuration conf, TableName table, String region, String family, String hfile, long size) + throws IOException { + Path path = HFileLink.createPath(table, region, family, hfile); + SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder().setType(SnapshotFileInfo.Type.HFILE) + .setHfile(path.toString()).build(); + if (size == -1) { + size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen(); + } + return new Pair<>(fileInfo, size); + } + /** * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible. * The groups created will have similar amounts of bytes. diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java index d488b070af2e..02aae1b341bd 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -37,13 +38,19 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -59,6 +66,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; /** * Test Export Snapshot Tool @@ -159,6 +167,40 @@ public void testExportFileSystemState() throws Exception { testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles); } + @Test + public void testExportFileSystemStateWithMergeRegion() throws Exception { + // disable compaction + admin.compactionSwitch(false, + admin.getRegionServers().stream().map(a -> a.getServerName()).collect(Collectors.toList())); + // create Table + TableName tableName0 = TableName.valueOf("testtb-" + testName.getMethodName() + "-1"); + String snapshotName0 = "snaptb0-" + testName.getMethodName() + "-1"; + admin.createTable( + TableDescriptorBuilder.newBuilder(tableName0) + .setColumnFamilies( + Lists.newArrayList(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build())) + .build(), + new byte[][] { Bytes.toBytes("2") }); + // put some data + try (Table table = admin.getConnection().getTable(tableName0)) { + table.put(new Put(Bytes.toBytes("1")).addColumn(FAMILY, null, Bytes.toBytes("1"))); + table.put(new Put(Bytes.toBytes("2")).addColumn(FAMILY, null, Bytes.toBytes("2"))); + } + List regions = admin.getRegions(tableName0); + assertEquals(2, regions.size()); + tableNumFiles = regions.size(); + // merge region + admin.mergeRegionsAsync(new byte[][] { regions.get(0).getEncodedNameAsBytes(), + regions.get(1).getEncodedNameAsBytes() }, + true).get(); + // take a snapshot + admin.snapshot(snapshotName0, tableName0); + // export snapshot and verify + testExportFileSystemState(tableName0, snapshotName0, snapshotName0, tableNumFiles); + // delete table + TEST_UTIL.deleteTable(tableName0); + } + @Test public void testExportFileSystemStateWithSkipTmp() throws Exception { TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true); @@ -285,12 +327,21 @@ public void storeFile(final RegionInfo regionInfo, final String family, return; } - String hfile = storeFile.getName(); - snapshotFiles.add(hfile); - if (!storeFile.hasReference()) { + if (!storeFile.hasReference() && !StoreFileInfo.isReference(storeFile.getName())) { + String hfile = storeFile.getName(); + snapshotFiles.add(hfile); verifyNonEmptyFile(new Path(exportedArchive, new Path(CommonFSUtils.getTableDir(new Path("./"), tableName), new Path(regionInfo.getEncodedName(), new Path(family, hfile))))); + } else { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(storeFile.getName()); + String region = referredToRegionAndFile.getFirst(); + String hfile = referredToRegionAndFile.getSecond(); + snapshotFiles.add(hfile); + verifyNonEmptyFile(new Path(exportedArchive, + new Path(CommonFSUtils.getTableDir(new Path("./"), tableName), + new Path(region, new Path(family, hfile))))); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java index 94b737c77f7d..dd5ed0cc9655 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java @@ -20,6 +20,8 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; @@ -28,10 +30,12 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock; import org.apache.hadoop.hbase.testclassification.MapReduceTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; @@ -104,7 +108,20 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs, Path[] r1Files = builder.addRegion(); Path[] r2Files = builder.addRegion(); builder.commit(); - int snapshotFilesCount = r1Files.length + r2Files.length; + // remove references, only keep data files + Set dataFiles = new HashSet<>(); + for (Path[] files: new Path[][]{r1Files, r2Files}) { + for (Path file : files) { + if (StoreFileInfo.isReference(file.getName())) { + Pair referredToRegionAndFile = + StoreFileInfo.getReferredToRegionAndFile(file.getName()); + dataFiles.add(referredToRegionAndFile.getSecond()); + } else { + dataFiles.add(file.getName()); + } + } + } + int snapshotFilesCount = dataFiles.size(); String snapshotName = builder.getSnapshotDescription().getName(); TableName tableName = builder.getTableDescriptor().getTableName(); TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java index 7445f4787ed4..263843249127 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -528,6 +529,24 @@ public static Path getReferredToFile(final Path p) { nameStrippedOfSuffix); } + /* + * Return region and file name referred to by a Reference. + * @param referenceFile HFile name which is a Reference. + * @return Calculated referenced region and file name. + * @throws IllegalArgumentException when referenceFile regex fails to match. + */ + public static Pair getReferredToRegionAndFile(final String referenceFile) { + Matcher m = REF_NAME_PATTERN.matcher(referenceFile); + if (m == null || !m.matches()) { + LOG.warn("Failed match of store file name {}", referenceFile); + throw new IllegalArgumentException("Failed match of store file name " + referenceFile); + } + String referencedRegion = m.group(2); + String referencedFile = m.group(1); + LOG.trace("reference {} to region={} file={}", referenceFile, referencedRegion, referencedFile); + return new Pair<>(referencedRegion, referencedFile); + } + /** * Validate the store file name. * @param fileName name of the file to validate