Skip to content

Commit

Permalink
HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3441)…
Browse files Browse the repository at this point in the history
… (#3417)

Signed-off-by: Michael Stack <[email protected]>
  • Loading branch information
virajjasani authored Jul 1, 2021
1 parent 147b030 commit 4c7da49
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hbase;

import org.apache.yetus.audience.InterfaceAudience;

/**
* Thrown if the master requires restart.
*/
@InterfaceAudience.Public
public class PleaseRestartMasterException extends HBaseIOException {

public PleaseRestartMasterException(final String s) {
super(s);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.PleaseRestartMasterException;
import org.apache.hadoop.hbase.RegionMetrics;
import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
import org.apache.hadoop.hbase.ServerMetrics;
Expand Down Expand Up @@ -176,6 +177,7 @@
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
Expand All @@ -196,6 +198,7 @@
import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FutureUtils;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
Expand Down Expand Up @@ -965,6 +968,14 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
if (!waitForMetaOnline()) {
return;
}

TableDescriptor metaDescriptor =
tableDescriptors.get(TableName.META_TABLE_NAME);
final ColumnFamilyDescriptor tableFamilyDesc =
metaDescriptor.getColumnFamily(HConstants.TABLE_FAMILY);
final ColumnFamilyDescriptor replBarrierFamilyDesc =
metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);

this.assignmentManager.joinCluster();
// The below depends on hbase:meta being online.
this.assignmentManager.processOfflineRegions();
Expand Down Expand Up @@ -1032,7 +1043,17 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
return;
}
status.setStatus("Starting cluster schema service");
initClusterSchemaService();
try {
initClusterSchemaService();
} catch (IllegalStateException e) {
if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
&& tableFamilyDesc == null && replBarrierFamilyDesc == null) {
LOG.info("ClusterSchema service could not be initialized. This is "
+ "expected during HBase 1 to 2 upgrade", e);
} else {
throw e;
}
}

if (this.cpHost != null) {
try {
Expand All @@ -1054,6 +1075,29 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
// Set master as 'initialized'.
setInitialized(true);

if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
// create missing CFs in meta table after master is set to 'initialized'.
createMissingCFsInMetaDuringUpgrade(metaDescriptor);

// Throwing this Exception to abort active master is painful but this
// seems the only way to add missing CFs in meta while upgrading from
// HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
// So, why do we abort active master after adding missing CFs in meta?
// When we reach here, we would have already bypassed NoSuchColumnFamilyException
// in initClusterSchemaService(), meaning ClusterSchemaService is not
// correctly initialized but we bypassed it. Similarly, we bypassed
// tableStateManager.start() as well. Hence, we should better abort
// current active master because our main task - adding missing CFs
// in meta table is done (possible only after master state is set as
// initialized) at the expense of bypassing few important tasks as part
// of active master init routine. So now we abort active master so that
// next active master init will not face any issues and all mandatory
// services will be started during master init phase.
throw new PleaseRestartMasterException("Aborting active master after missing"
+ " CFs are successfully added in meta. Subsequent active master "
+ "initialization should be uninterrupted");
}

if (maintenanceMode) {
LOG.info("Detected repair mode, skipping final initialization steps.");
return;
Expand Down Expand Up @@ -1113,6 +1157,38 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
}
}

private void createMissingCFsInMetaDuringUpgrade(
TableDescriptor metaDescriptor) throws IOException {
TableDescriptor newMetaDesc =
TableDescriptorBuilder.newBuilder(metaDescriptor)
.setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
.setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta())
.build();
long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc,
0, 0, false);
int tries = 30;
while (!(getMasterProcedureExecutor().isFinished(pid))
&& getMasterProcedureExecutor().isRunning() && tries > 0) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new IOException("Wait interrupted", e);
}
tries--;
}
if (tries <= 0) {
throw new HBaseIOException(
"Failed to add table and rep_barrier CFs to meta in a given time.");
} else {
Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
if (result != null && result.isFailed()) {
throw new IOException(
"Failed to add table and rep_barrier CFs to meta. "
+ MasterProcedureUtil.unwrapRemoteIOException(result));
}
}
}

/**
* Check hbase:meta is up and ready for reading. For use during Master startup only.
* @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableInfoMissingException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
import org.apache.hadoop.hbase.client.TableDescriptor;
Expand Down Expand Up @@ -139,6 +140,31 @@ public static TableDescriptor tryUpdateAndGetMetaTableDescriptor(Configuration c
}
}

public static ColumnFamilyDescriptor getTableFamilyDescForMeta(
final Configuration conf) {
return ColumnFamilyDescriptorBuilder
.newBuilder(HConstants.TABLE_FAMILY)
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
HConstants.DEFAULT_HBASE_META_VERSIONS))
.setInMemory(true)
.setBlocksize(8 * 1024)
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1)
.setBloomFilterType(BloomType.ROWCOL)
.build();
}

public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() {
return ColumnFamilyDescriptorBuilder
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
.setMaxVersions(HConstants.ALL_VERSIONS)
.setInMemory(true)
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1)
.setBloomFilterType(BloomType.ROWCOL)
.build();
}

private static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
throws IOException {
// TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
Expand All @@ -155,23 +181,8 @@ private static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Con
.setBloomFilterType(BloomType.ROWCOL)
.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1)
.build())
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
HConstants.DEFAULT_HBASE_META_VERSIONS))
.setInMemory(true)
.setBlocksize(8 * 1024)
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1)
.setBloomFilterType(BloomType.ROWCOL)
.build())
.setColumnFamily(ColumnFamilyDescriptorBuilder
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
.setMaxVersions(HConstants.ALL_VERSIONS)
.setInMemory(true)
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1)
.setBloomFilterType(BloomType.ROWCOL)
.build())
.setColumnFamily(getTableFamilyDescForMeta(conf))
.setColumnFamily(getReplBarrierFamilyDescForMeta())
.setColumnFamily(ColumnFamilyDescriptorBuilder
.newBuilder(HConstants.NAMESPACE_FAMILY)
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
Expand Down

0 comments on commit 4c7da49

Please sign in to comment.