Skip to content

Commit

Permalink
HubSpot Backport: HBASE-28502 Cleanup old backup manifest logic (apac…
Browse files Browse the repository at this point in the history
…he#5871)

In older versions of HBase's backup mechanism, a manifest
was written per table being backed up. This was since refactored
to one manifest per backup, but the manifest code was not updated.
A concrete issue with the old code was that the manifest
for full backups did not correctly list the tables included
in the backup.

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
Reviewed-by: Ray Mattingly <rmdmattingly@gmail.com>
  • Loading branch information
DieterDP-ng authored and Ray Mattingly committed May 17, 2024
1 parent 6a7313b commit 046f5b9
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 97 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@
package org.apache.hadoop.hbase.backup;

import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.yetus.audience.InterfaceAudience;
Expand All @@ -44,8 +42,8 @@ private HBackupFileSystem() {
}

/**
* Given the backup root dir, backup id and the table name, return the backup image location,
* which is also where the backup manifest file is. return value look like:
* Given the backup root dir, backup id and the table name, return the backup image location.
* Return value look like:
* "hdfs://backup.hbase.org:9000/user/biadmin/backup/backup_1396650096738/default/t1_dn/", where
* "hdfs://backup.hbase.org:9000/user/biadmin/backup" is a backup root directory
* @param backupRootDir backup root directory
Expand Down Expand Up @@ -79,11 +77,6 @@ public static Path getBackupTmpDirPathForBackupId(String backupRoot, String back
return new Path(getBackupTmpDirPath(backupRoot), backupId);
}

public static String getTableBackupDataDir(String backupRootDir, String backupId,
TableName tableName) {
return getTableBackupDir(backupRootDir, backupId, tableName) + Path.SEPARATOR + "data";
}

public static Path getBackupPath(String backupRootDir, String backupId) {
return new Path(backupRootDir + Path.SEPARATOR + backupId);
}
Expand All @@ -102,24 +95,6 @@ public static Path getTableBackupPath(TableName tableName, Path backupRootPath,
return new Path(getTableBackupDir(backupRootPath.toString(), backupId, tableName));
}

/**
* Given the backup root dir and the backup id, return the log file location for an incremental
* backup.
* @param backupRootDir backup root directory
* @param backupId backup id
* @return logBackupDir: ".../user/biadmin/backup/WALs/backup_1396650096738"
*/
public static String getLogBackupDir(String backupRootDir, String backupId) {
return backupRootDir + Path.SEPARATOR + backupId + Path.SEPARATOR
+ HConstants.HREGION_LOGDIR_NAME;
}

public static Path getLogBackupPath(String backupRootDir, String backupId) {
return new Path(getLogBackupDir(backupRootDir, backupId));
}

// TODO we do not keep WAL files anymore
// Move manifest file to other place
private static Path getManifestPath(Configuration conf, Path backupRootPath, String backupId)
throws IOException {
FileSystem fs = backupRootPath.getFileSystem(conf);
Expand All @@ -140,19 +115,4 @@ public static BackupManifest getManifest(Configuration conf, Path backupRootPath
new BackupManifest(conf, getManifestPath(conf, backupRootPath, backupId));
return manifest;
}

/**
* Check whether the backup image path and there is manifest file in the path.
* @param backupManifestMap If all the manifests are found, then they are put into this map
* @param tableArray the tables involved
* @throws IOException exception
*/
public static void checkImageManifestExist(HashMap<TableName, BackupManifest> backupManifestMap,
TableName[] tableArray, Configuration conf, Path backupRootPath, String backupId)
throws IOException {
for (TableName tableName : tableArray) {
BackupManifest manifest = getManifest(conf, backupRootPath, backupId);
backupManifestMap.put(tableName, manifest);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -498,16 +499,15 @@ private String[] toStringArray(TableName[] list) {
@Override
public void restore(RestoreRequest request) throws IOException {
if (request.isCheck()) {
HashMap<TableName, BackupManifest> backupManifestMap = new HashMap<>();
// check and load backup image manifest for the tables
Path rootPath = new Path(request.getBackupRootDir());
String backupId = request.getBackupId();
TableName[] sTableArray = request.getFromTables();
HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray,
conn.getConfiguration(), rootPath, backupId);
BackupManifest manifest =
HBackupFileSystem.getManifest(conn.getConfiguration(), rootPath, backupId);

// Check and validate the backup image and its dependencies
if (BackupUtils.validate(backupManifestMap, conn.getConfiguration())) {
if (BackupUtils.validate(Arrays.asList(sTableArray), manifest, conn.getConfiguration())) {
LOG.info(CHECK_OK);
} else {
LOG.error(CHECK_FAILED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ public List<TableName> getTableList() {
}

/**
* TODO: fix it. Persist the manifest file.
* Persist the manifest file.
* @throws BackupException if an error occurred while storing the manifest file.
*/
public void store(Configuration conf) throws BackupException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.TreeSet;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -203,19 +202,17 @@ private List<Path> getFilesRecursively(String fileBackupDir)

/**
* Restore operation. Stage 2: resolved Backup Image dependency
* @param backupManifestMap : tableName, Manifest
* @param sTableArray The array of tables to be restored
* @param tTableArray The array of mapping tables to restore to
* @param sTableArray The array of tables to be restored
* @param tTableArray The array of mapping tables to restore to
* @throws IOException exception
*/
private void restore(HashMap<TableName, BackupManifest> backupManifestMap,
TableName[] sTableArray, TableName[] tTableArray, boolean isOverwrite) throws IOException {
private void restore(BackupManifest manifest, TableName[] sTableArray, TableName[] tTableArray,
boolean isOverwrite) throws IOException {
TreeSet<BackupImage> restoreImageSet = new TreeSet<>();

for (int i = 0; i < sTableArray.length; i++) {
TableName table = sTableArray[i];

BackupManifest manifest = backupManifestMap.get(table);
// Get the image list of this backup for restore in time order from old
// to new.
List<BackupImage> list = new ArrayList<>();
Expand Down Expand Up @@ -255,12 +252,10 @@ public void execute() throws IOException {
checkTargetTables(tTableArray, isOverwrite);

// case RESTORE_IMAGES:
HashMap<TableName, BackupManifest> backupManifestMap = new HashMap<>();
// check and load backup image manifest for the tables
Path rootPath = new Path(backupRootDir);
HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, conf, rootPath,
backupId);
BackupManifest manifest = HBackupFileSystem.getManifest(conf, rootPath, backupId);

restore(backupManifestMap, sTableArray, tTableArray, isOverwrite);
restore(manifest, sTableArray, tTableArray, isOverwrite);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -268,7 +267,7 @@ public static void cleanupAndRestoreBackupSystem(Connection conn, BackupInfo bac
}

/**
* Add manifest for the current backup. The manifest is stored within the table backup directory.
* Creates a manifest based on the provided info, and store it in the backup-specific directory.
* @param backupInfo The current backup info
* @throws IOException exception
*/
Expand All @@ -277,43 +276,16 @@ protected void addManifest(BackupInfo backupInfo, BackupManager backupManager, B
// set the overall backup phase : store manifest
backupInfo.setPhase(BackupPhase.STORE_MANIFEST);

BackupManifest manifest;

// Since we have each table's backup in its own directory structure,
// we'll store its manifest with the table directory.
for (TableName table : backupInfo.getTables()) {
manifest = new BackupManifest(backupInfo, table);
ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo, table);
for (BackupImage image : ancestors) {
manifest.addDependentImage(image);
}

if (type == BackupType.INCREMENTAL) {
// We'll store the log timestamps for this table only in its manifest.
Map<TableName, Map<String, Long>> tableTimestampMap = new HashMap<>();
tableTimestampMap.put(table, backupInfo.getIncrTimestampMap().get(table));
manifest.setIncrTimestampMap(tableTimestampMap);
ArrayList<BackupImage> ancestorss = backupManager.getAncestors(backupInfo);
for (BackupImage image : ancestorss) {
manifest.addDependentImage(image);
}
}
manifest.store(conf);
}

// For incremental backup, we store a overall manifest in
// <backup-root-dir>/WALs/<backup-id>
// This is used when created the next incremental backup
BackupManifest manifest = new BackupManifest(backupInfo);
if (type == BackupType.INCREMENTAL) {
manifest = new BackupManifest(backupInfo);
// set the table region server start and end timestamps for incremental backup
manifest.setIncrTimestampMap(backupInfo.getIncrTimestampMap());
ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo);
for (BackupImage image : ancestors) {
manifest.addDependentImage(image);
}
manifest.store(conf);
}
ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo);
for (BackupImage image : ancestors) {
manifest.addDependentImage(image);
}
manifest.store(conf);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -663,15 +663,14 @@ public static RestoreRequest createRestoreRequest(String backupRootDir, String b
return request;
}

public static boolean validate(HashMap<TableName, BackupManifest> backupManifestMap,
public static boolean validate(List<TableName> tables, BackupManifest backupManifest,
Configuration conf) throws IOException {
boolean isValid = true;

for (Entry<TableName, BackupManifest> manifestEntry : backupManifestMap.entrySet()) {
TableName table = manifestEntry.getKey();
for (TableName table : tables) {
TreeSet<BackupImage> imageSet = new TreeSet<>();

ArrayList<BackupImage> depList = manifestEntry.getValue().getDependentListByTable(table);
ArrayList<BackupImage> depList = backupManifest.getDependentListByTable(table);
if (depList != null && !depList.isEmpty()) {
imageSet.addAll(depList);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@
*/
package org.apache.hadoop.hbase.backup;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.util.HashSet;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.util.ToolRunner;
Expand All @@ -30,6 +34,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Sets;

@Category(LargeTests.class)
public class TestFullBackup extends TestBackupBase {

Expand All @@ -56,6 +62,11 @@ public void testFullBackupMultipleCommand() throws Exception {
String backupId = data.getBackupId();
assertTrue(checkSucceeded(backupId));
}

BackupInfo newestBackup = backups.get(0);
BackupManifest manifest =
HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), newestBackup.getBackupId());
assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList()));
}
LOG.info("backup complete");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,19 @@
*/
package org.apache.hadoop.hbase.backup;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
Expand All @@ -49,6 +53,7 @@
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.com.google.common.collect.Sets;

@Category(LargeTests.class)
@RunWith(Parameterized.class)
Expand Down Expand Up @@ -145,6 +150,9 @@ public void TestIncBackupRestore() throws Exception {
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
String backupIdIncMultiple = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple));
BackupManifest manifest =
HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), backupIdIncMultiple);
assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList()));

// add column family f2 to table1
// drop column family f3
Expand Down

0 comments on commit 046f5b9

Please sign in to comment.