From 9a2c74a88a22b7699da3d9eb38bdfa6e9fdfc094 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 6 Jan 2025 14:34:50 -0800 Subject: [PATCH] Revert "HBASE-28836 Parallize the file archival to improve the split times (#6483)" This reverts commit 262c5bb767618074dd22b074aee19670d81c0884. --- .../hadoop/hbase/backup/HFileArchiver.java | 86 +++++++------------ .../procedure/DeleteTableProcedure.java | 3 +- .../hbase/regionserver/HRegionFileSystem.java | 2 +- .../hbase/backup/TestHFileArchiving.java | 12 +-- 4 files changed, 37 insertions(+), 66 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java index 389ad66d45c2..b2ea9cd33a0b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java @@ -23,9 +23,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; @@ -99,7 +97,7 @@ public static boolean exists(Configuration conf, FileSystem fs, RegionInfo info) public static void archiveRegion(Configuration conf, FileSystem fs, RegionInfo info) throws IOException { Path rootDir = CommonFSUtils.getRootDir(conf); - archiveRegion(conf, fs, rootDir, CommonFSUtils.getTableDir(rootDir, info.getTable()), + archiveRegion(fs, rootDir, CommonFSUtils.getTableDir(rootDir, info.getTable()), FSUtils.getRegionDirFromRootDir(rootDir, info)); } @@ -115,8 +113,8 @@ public static void archiveRegion(Configuration conf, FileSystem fs, RegionInfo i * operations could not complete. * @throws IOException if the request cannot be completed */ - public static boolean archiveRegion(final Configuration conf, FileSystem fs, Path rootdir, - Path tableDir, Path regionDir) throws IOException { + public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir) + throws IOException { // otherwise, we archive the files // make sure we can archive if (tableDir == null || regionDir == null) { @@ -159,8 +157,8 @@ public boolean accept(Path file) { // convert the files in the region to a File Stream.of(storeDirs).map(getAsFile).forEachOrdered(toArchive::add); LOG.debug("Archiving " + toArchive); - List failedArchive = resolveAndArchive(conf, fs, regionArchiveDir, toArchive, - EnvironmentEdgeManager.currentTime()); + List failedArchive = + resolveAndArchive(fs, regionArchiveDir, toArchive, EnvironmentEdgeManager.currentTime()); if (!failedArchive.isEmpty()) { throw new FailedArchiveException( "Failed to archive/delete all the files for region:" + regionDir.getName() + " into " @@ -188,7 +186,7 @@ public static void archiveRegions(Configuration conf, FileSystem fs, Path rootDi List> futures = new ArrayList<>(regionDirList.size()); for (Path regionDir : regionDirList) { Future future = getArchiveExecutor(conf).submit(() -> { - archiveRegion(conf, fs, rootDir, tableDir, regionDir); + archiveRegion(fs, rootDir, tableDir, regionDir); return null; }); futures.add(future); @@ -260,8 +258,8 @@ public static void archiveFamily(FileSystem fs, Configuration conf, RegionInfo p * @param family the family hosting the store files * @throws IOException if the files could not be correctly disposed. */ - public static void archiveFamilyByFamilyDir(FileSystem fs, final Configuration conf, - RegionInfo parent, Path familyDir, byte[] family) throws IOException { + public static void archiveFamilyByFamilyDir(FileSystem fs, Configuration conf, RegionInfo parent, + Path familyDir, byte[] family) throws IOException { FileStatus[] storeFiles = CommonFSUtils.listStatus(fs, familyDir); if (storeFiles == null) { LOG.debug("No files to dispose of in {}, family={}", parent.getRegionNameAsString(), @@ -275,7 +273,7 @@ public static void archiveFamilyByFamilyDir(FileSystem fs, final Configuration c // do the actual archive List failedArchive = - resolveAndArchive(conf, fs, storeArchiveDir, toArchive, EnvironmentEdgeManager.currentTime()); + resolveAndArchive(fs, storeArchiveDir, toArchive, EnvironmentEdgeManager.currentTime()); if (!failedArchive.isEmpty()) { throw new FailedArchiveException( "Failed to archive/delete all the files for region:" @@ -295,11 +293,10 @@ public static void archiveFamilyByFamilyDir(FileSystem fs, final Configuration c * attempted; otherwise likely to cause an {@link IOException} * @throws IOException if the files could not be correctly disposed. */ - public static void archiveStoreFiles(final Configuration conf, FileSystem fs, - RegionInfo regionInfo, Path tableDir, byte[] family, Collection compactedFiles) - throws IOException { + public static void archiveStoreFiles(Configuration conf, FileSystem fs, RegionInfo regionInfo, + Path tableDir, byte[] family, Collection compactedFiles) throws IOException { Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, regionInfo, tableDir, family); - archive(conf, fs, regionInfo, family, compactedFiles, storeArchiveDir); + archive(fs, regionInfo, family, compactedFiles, storeArchiveDir); } /** @@ -330,11 +327,11 @@ public static void archiveRecoveredEdits(Configuration conf, FileSystem fs, Regi "Wrong file system! Should be " + path.toUri().getScheme() + ", but got " + fs.getScheme()); } path = HFileArchiveUtil.getStoreArchivePathForRootDir(path, regionInfo, family); - archive(conf, fs, regionInfo, family, replayedEdits, path); + archive(fs, regionInfo, family, replayedEdits, path); } - private static void archive(final Configuration conf, FileSystem fs, RegionInfo regionInfo, - byte[] family, Collection compactedFiles, Path storeArchiveDir) throws IOException { + private static void archive(FileSystem fs, RegionInfo regionInfo, byte[] family, + Collection compactedFiles, Path storeArchiveDir) throws IOException { // sometimes in testing, we don't have rss, so we need to check for that if (fs == null) { LOG.warn( @@ -368,8 +365,8 @@ private static void archive(final Configuration conf, FileSystem fs, RegionInfo compactedFiles.stream().map(getStorePath).collect(Collectors.toList()); // do the actual archive - List failedArchive = resolveAndArchive(conf, fs, storeArchiveDir, storeFiles, - EnvironmentEdgeManager.currentTime()); + List failedArchive = + resolveAndArchive(fs, storeArchiveDir, storeFiles, EnvironmentEdgeManager.currentTime()); if (!failedArchive.isEmpty()) { throw new FailedArchiveException( @@ -422,8 +419,8 @@ public static void archiveStoreFile(Configuration conf, FileSystem fs, RegionInf * @return the list of failed to archive files. * @throws IOException if an unexpected file operation exception occurred */ - private static List resolveAndArchive(final Configuration conf, FileSystem fs, - Path baseArchiveDir, Collection toArchive, long start) throws IOException { + private static List resolveAndArchive(FileSystem fs, Path baseArchiveDir, + Collection toArchive, long start) throws IOException { // short circuit if no files to move if (toArchive.isEmpty()) { return Collections.emptyList(); @@ -440,54 +437,33 @@ private static List resolveAndArchive(final Configuration conf, FileSystem LOG.trace("Created archive directory {}", baseArchiveDir); } - List failures = Collections.synchronizedList(new ArrayList<>()); + List failures = new ArrayList<>(); String startTime = Long.toString(start); - List filesOnly = new ArrayList<>(); for (File file : toArchive) { + // if its a file archive it try { - if (!file.isFile()) { - // if its a directory and we need to archive all files + LOG.trace("Archiving {}", file); + if (file.isFile()) { + // attempt to archive the file + if (!resolveAndArchiveFile(baseArchiveDir, file, startTime)) { + LOG.warn("Couldn't archive " + file + " into backup directory: " + baseArchiveDir); + failures.add(file); + } + } else { + // otherwise its a directory and we need to archive all files LOG.trace("{} is a directory, archiving children files", file); // so we add the directory name to the one base archive Path parentArchiveDir = new Path(baseArchiveDir, file.getName()); // and then get all the files from that directory and attempt to // archive those too Collection children = file.getChildren(); - failures.addAll(resolveAndArchive(conf, fs, parentArchiveDir, children, start)); - } else { - filesOnly.add(file); + failures.addAll(resolveAndArchive(fs, parentArchiveDir, children, start)); } } catch (IOException e) { LOG.warn("Failed to archive {}", file, e); failures.add(file); } } - Map> futures = new HashMap<>(); - // In current baseDir all files will be processed concurrently - for (File file : filesOnly) { - LOG.trace("Archiving {}", file); - Future archiveTask = getArchiveExecutor(conf) - .submit(() -> resolveAndArchiveFile(baseArchiveDir, file, startTime)); - futures.put(file, archiveTask); - } - - for (Map.Entry> fileFutureEntry : futures.entrySet()) { - try { - boolean fileCleaned = fileFutureEntry.getValue().get(); - if (!fileCleaned) { - LOG.warn("Couldn't archive {} into backup directory: {}", fileFutureEntry.getKey(), - baseArchiveDir); - failures.add(fileFutureEntry.getKey()); - } - } catch (InterruptedException e) { - LOG.warn("HFileArchive Cleanup thread was interrupted"); - failures.add(fileFutureEntry.getKey()); - } catch (ExecutionException e) { - // this is IOException - LOG.warn("Failed to archive {}", fileFutureEntry.getKey(), e); - failures.add(fileFutureEntry.getKey()); - } - } return failures; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index b0c36082f5d2..da6ad90780db 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -319,8 +319,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env, final TableName CommonFSUtils.getTableDir(new Path(mfs.getRootDir(), MobConstants.MOB_DIR_NAME), tableName); Path regionDir = new Path(mobTableDir, MobUtils.getMobRegionInfo(tableName).getEncodedName()); if (fs.exists(regionDir)) { - HFileArchiver.archiveRegion(env.getMasterConfiguration(), fs, mfs.getRootDir(), mobTableDir, - regionDir); + HFileArchiver.archiveRegion(fs, mfs.getRootDir(), mobTableDir, regionDir); } // Delete table directory from FS diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index f7144c7fa9dd..6fccccfc8203 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -1059,7 +1059,7 @@ public static void deleteRegionFromFileSystem(final Configuration conf, final Fi // Archive region Path rootDir = CommonFSUtils.getRootDir(conf); - HFileArchiver.archiveRegion(conf, fs, rootDir, tableDir, regionDir); + HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir); // Delete empty region dir if (!fs.delete(regionDir, true)) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java index 12ea6180bbfc..4c7337e59ec7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java @@ -651,8 +651,7 @@ public void testCleaningRace() throws Exception { try { // Try to archive the file - HFileArchiver.archiveRegion(conf, fs, rootDir, sourceRegionDir.getParent(), - sourceRegionDir); + HFileArchiver.archiveRegion(fs, rootDir, sourceRegionDir.getParent(), sourceRegionDir); // The archiver succeded, the file is no longer in the original location // but it's in the archive location. @@ -684,8 +683,7 @@ public void testArchiveRegionTableAndRegionDirsNull() throws IOException { Path rootDir = UTIL.getDataTestDirOnTestFS("testCleaningRace"); FileSystem fileSystem = UTIL.getTestFileSystem(); // Try to archive the file but with null regionDir, can't delete sourceFile - Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration(); - assertFalse(HFileArchiver.archiveRegion(conf, fileSystem, rootDir, null, null)); + assertFalse(HFileArchiver.archiveRegion(fileSystem, rootDir, null, null)); } @Test @@ -694,7 +692,6 @@ public void testArchiveRegionWithTableDirNull() throws IOException { CommonFSUtils.getTableDir(new Path("./"), TableName.valueOf(name.getMethodName())), "xyzabc"); Path familyDir = new Path(regionDir, "rd"); Path rootDir = UTIL.getDataTestDirOnTestFS("testCleaningRace"); - Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration(); Path file = new Path(familyDir, "1"); Path sourceFile = new Path(rootDir, file); FileSystem fileSystem = UTIL.getTestFileSystem(); @@ -702,7 +699,7 @@ public void testArchiveRegionWithTableDirNull() throws IOException { Path sourceRegionDir = new Path(rootDir, regionDir); fileSystem.mkdirs(sourceRegionDir); // Try to archive the file - assertFalse(HFileArchiver.archiveRegion(conf, fileSystem, rootDir, null, sourceRegionDir)); + assertFalse(HFileArchiver.archiveRegion(fileSystem, rootDir, null, sourceRegionDir)); assertFalse(fileSystem.exists(sourceRegionDir)); } @@ -713,7 +710,6 @@ public void testArchiveRegionWithRegionDirNull() throws IOException { "elgn4nf"); Path familyDir = new Path(regionDir, "rdar"); Path rootDir = UTIL.getDataTestDirOnTestFS("testCleaningRace"); - Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration(); Path file = new Path(familyDir, "2"); Path sourceFile = new Path(rootDir, file); FileSystem fileSystem = UTIL.getTestFileSystem(); @@ -722,7 +718,7 @@ public void testArchiveRegionWithRegionDirNull() throws IOException { fileSystem.mkdirs(sourceRegionDir); // Try to archive the file but with null regionDir, can't delete sourceFile assertFalse( - HFileArchiver.archiveRegion(conf, fileSystem, rootDir, sourceRegionDir.getParent(), null)); + HFileArchiver.archiveRegion(fileSystem, rootDir, sourceRegionDir.getParent(), null)); assertTrue(fileSystem.exists(sourceRegionDir)); fileSystem.delete(sourceRegionDir, true); }