Skip to content

Commit

Permalink
HBASE-26625 ExportSnapshot tool failed to copy data files for tables …
Browse files Browse the repository at this point in the history
…with merge region (apache#3981)

Signed-off-by: Duo Zhang <[email protected]>
  • Loading branch information
mymeiyi authored and bbeaudreault committed May 14, 2024
1 parent 867a801 commit 25b1abf
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
Expand Down Expand Up @@ -583,29 +584,39 @@ private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configu
@Override
public void storeFile(final RegionInfo regionInfo, final String family,
final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
// for storeFile.hasReference() case, copied as part of the manifest
Pair<SnapshotFileInfo, Long> snapshotFileAndSize = null;
if (!storeFile.hasReference()) {
String region = regionInfo.getEncodedName();
String hfile = storeFile.getName();
Path path = HFileLink.createPath(table, region, family, hfile);

SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
.setType(SnapshotFileInfo.Type.HFILE).setHfile(path.toString()).build();

long size;
if (storeFile.hasFileSize()) {
size = storeFile.getFileSize();
} else {
size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
}
files.add(new Pair<>(fileInfo, size));
snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, region, family, hfile,
storeFile.hasFileSize() ? storeFile.getFileSize() : -1);
} else {
Pair<String, String> referredToRegionAndFile =
StoreFileInfo.getReferredToRegionAndFile(storeFile.getName());
String referencedRegion = referredToRegionAndFile.getFirst();
String referencedHFile = referredToRegionAndFile.getSecond();
snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, referencedRegion, family,
referencedHFile, storeFile.hasFileSize() ? storeFile.getFileSize() : -1);
}
files.add(snapshotFileAndSize);
}
});

return files;
}

private static Pair<SnapshotFileInfo, Long> getSnapshotFileAndSize(FileSystem fs,
Configuration conf, TableName table, String region, String family, String hfile, long size)
throws IOException {
Path path = HFileLink.createPath(table, region, family, hfile);
SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder().setType(SnapshotFileInfo.Type.HFILE)
.setHfile(path.toString()).build();
if (size == -1) {
size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
}
return new Pair<>(fileInfo, size);
}

/**
* Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
* The groups created will have similar amounts of bytes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
Expand All @@ -40,13 +41,19 @@
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
Expand All @@ -60,6 +67,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;

import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;

Expand Down Expand Up @@ -162,6 +171,39 @@ public void testExportFileSystemState() throws Exception {
testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
}

@Test
public void testExportFileSystemStateWithMergeRegion() throws Exception {
// disable compaction
admin.compactionSwitch(false,
admin.getRegionServers().stream().map(a -> a.getServerName()).collect(Collectors.toList()));
// create Table
TableName tableName0 = TableName.valueOf("testtb-" + testName.getMethodName() + "-1");
byte[] snapshotName0 = Bytes.toBytes("snaptb0-" + testName.getMethodName() + "-1");
admin.createTable(
TableDescriptorBuilder.newBuilder(tableName0)
.setColumnFamilies(
Lists.newArrayList(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build()))
.build(),
new byte[][] { Bytes.toBytes("2") });
// put some data
try (Table table = admin.getConnection().getTable(tableName0)) {
table.put(new Put(Bytes.toBytes("1")).addColumn(FAMILY, null, Bytes.toBytes("1")));
table.put(new Put(Bytes.toBytes("2")).addColumn(FAMILY, null, Bytes.toBytes("2")));
}
List<RegionInfo> regions = admin.getRegions(tableName0);
assertEquals(2, regions.size());
tableNumFiles = regions.size();
// merge region
admin.mergeRegionsAsync(new byte[][] { regions.get(0).getEncodedNameAsBytes(),
regions.get(1).getEncodedNameAsBytes() }, true).get();
// take a snapshot
admin.snapshot(snapshotName0, tableName0);
// export snapshot and verify
testExportFileSystemState(tableName0, snapshotName0, snapshotName0, tableNumFiles);
// delete table
TEST_UTIL.deleteTable(tableName0);
}

@Test
public void testExportFileSystemStateWithSkipTmp() throws Exception {
TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);
Expand Down Expand Up @@ -330,12 +372,21 @@ public void storeFile(final RegionInfo regionInfo, final String family,
return;
}

String hfile = storeFile.getName();
snapshotFiles.add(hfile);
if (!storeFile.hasReference()) {
if (!storeFile.hasReference() && !StoreFileInfo.isReference(storeFile.getName())) {
String hfile = storeFile.getName();
snapshotFiles.add(hfile);
verifyNonEmptyFile(new Path(exportedArchive,
new Path(CommonFSUtils.getTableDir(new Path("./"), tableName),
new Path(regionInfo.getEncodedName(), new Path(family, hfile)))));
} else {
Pair<String, String> referredToRegionAndFile =
StoreFileInfo.getReferredToRegionAndFile(storeFile.getName());
String region = referredToRegionAndFile.getFirst();
String hfile = referredToRegionAndFile.getSecond();
snapshotFiles.add(hfile);
verifyNonEmptyFile(new Path(exportedArchive,
new Path(CommonFSUtils.getTableDir(new Path("./"), tableName),
new Path(region, new Path(family, hfile)))));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
Expand All @@ -28,11 +30,13 @@
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock;
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
Expand Down Expand Up @@ -104,7 +108,20 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs,
Path[] r1Files = builder.addRegion();
Path[] r2Files = builder.addRegion();
builder.commit();
int snapshotFilesCount = r1Files.length + r2Files.length;
// remove references, only keep data files
Set<String> dataFiles = new HashSet<>();
for (Path[] files : new Path[][] { r1Files, r2Files }) {
for (Path file : files) {
if (StoreFileInfo.isReference(file.getName())) {
Pair<String, String> referredToRegionAndFile =
StoreFileInfo.getReferredToRegionAndFile(file.getName());
dataFiles.add(referredToRegionAndFile.getSecond());
} else {
dataFiles.add(file.getName());
}
}
}
int snapshotFilesCount = dataFiles.size();
byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName());
TableName tableName = builder.getTableDescriptor().getTableName();
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -518,6 +519,24 @@ public static Path getReferredToFile(final Path p) {
nameStrippedOfSuffix);
}

/*
* Return region and file name referred to by a Reference.
* @param referenceFile HFile name which is a Reference.
* @return Calculated referenced region and file name.
* @throws IllegalArgumentException when referenceFile regex fails to match.
*/
public static Pair<String, String> getReferredToRegionAndFile(final String referenceFile) {
Matcher m = REF_NAME_PATTERN.matcher(referenceFile);
if (m == null || !m.matches()) {
LOG.warn("Failed match of store file name {}", referenceFile);
throw new IllegalArgumentException("Failed match of store file name " + referenceFile);
}
String referencedRegion = m.group(2);
String referencedFile = m.group(1);
LOG.trace("reference {} to region={} file={}", referenceFile, referencedRegion, referencedFile);
return new Pair<>(referencedRegion, referencedFile);
}

/**
* Validate the store file name.
* @param fileName name of the file to validate
Expand Down

0 comments on commit 25b1abf

Please sign in to comment.