-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-25955 Setting NAMESPACES when adding a replication peer still requires scope definition at CF level #4052
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -290,12 +290,14 @@ public static ReplicationPeerConfig convert(ReplicationProtos.ReplicationPeer pe | |
peer.getTableCfsList().toArray(new ReplicationProtos.TableCF[peer.getTableCfsCount()])); | ||
if (tableCFsMap != null) { | ||
builder.setTableCFsMap(tableCFsMap); | ||
builder.setChainedFiltersOperation(peer.getChainOperator()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think this is a good name? The chain filter is an internal implementation in HBase, maybe in the future we could change the implementation to not use a filter... Maybe just name it overrideReplicationScope or something similar? |
||
} | ||
|
||
List<ByteString> namespacesList = peer.getNamespacesList(); | ||
if (namespacesList != null && namespacesList.size() != 0) { | ||
builder.setNamespaces( | ||
namespacesList.stream().map(ByteString::toStringUtf8).collect(Collectors.toSet())); | ||
builder.setChainedFiltersOperation(peer.getChainOperator()); | ||
} | ||
|
||
if (peer.hasBandwidth()) { | ||
|
@@ -357,12 +359,19 @@ public static ReplicationProtos.ReplicationPeer convert(ReplicationPeerConfig pe | |
for (int i = 0; i < tableCFs.length; i++) { | ||
builder.addTableCfs(tableCFs[i]); | ||
} | ||
if (peerConfig.getChainedFiltersOperator() != null) { | ||
builder.setChainOperator(peerConfig.getChainedFiltersOperator()); | ||
} | ||
|
||
} | ||
Set<String> namespaces = peerConfig.getNamespaces(); | ||
if (namespaces != null) { | ||
for (String namespace : namespaces) { | ||
builder.addNamespaces(ByteString.copyFromUtf8(namespace)); | ||
} | ||
if (peerConfig.getChainedFiltersOperator() != null) { | ||
builder.setChainOperator(peerConfig.getChainedFiltersOperator()); | ||
} | ||
} | ||
|
||
builder.setBandwidth(peerConfig.getBandwidth()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,9 @@ | |
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.function.Function; | ||
|
||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.hadoop.hbase.Cell; | ||
import org.apache.hadoop.hbase.HBaseInterfaceAudience; | ||
import org.apache.hadoop.hbase.regionserver.wal.WALUtil; | ||
|
@@ -33,9 +36,9 @@ | |
*/ | ||
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.REPLICATION) | ||
public class ChainWALEntryFilter implements WALEntryFilter { | ||
|
||
private final WALEntryFilter[] filters; | ||
private WALCellFilter[] cellFilters; | ||
private Operator operator = Operator.AND; | ||
|
||
public ChainWALEntryFilter(WALEntryFilter...filters) { | ||
this.filters = filters; | ||
|
@@ -56,6 +59,13 @@ public ChainWALEntryFilter(List<WALEntryFilter> filters) { | |
initCellFilters(); | ||
} | ||
|
||
public ChainWALEntryFilter(List<WALEntryFilter> filters, String operatorName) { | ||
this(filters); | ||
if (!StringUtils.isEmpty(operatorName)) { | ||
this.operator = Operator.valueOf(operatorName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to figure out the first place we read the String "operatorName" and make sure it fails gracefully. I know you have the client-side checking in Ruby code, and I suggested we have Java data validation. We should check it here as future-proofing. I think this happens early enough in the replication setup that the client would see a RemoteException flowing back to them? (not that their There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean, beyond the checks to avoid an NPE, explicitly extra check for the valid strings and throw IllegalArgumentException, rather than letting the enum error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, something that we would at least throw back a well-formed exception (and not something that might be very terse/short) |
||
} | ||
} | ||
|
||
public void initCellFilters() { | ||
ArrayList<WALCellFilter> cellFilters = new ArrayList<>(filters.length); | ||
for (WALEntryFilter filter : filters) { | ||
|
@@ -68,7 +78,7 @@ public void initCellFilters() { | |
|
||
@Override | ||
public Entry filter(Entry entry) { | ||
entry = filterEntry(entry); | ||
entry = filterEntry(entry, operator.entryOp); | ||
if (entry == null) { | ||
return null; | ||
} | ||
|
@@ -77,30 +87,49 @@ public Entry filter(Entry entry) { | |
return entry; | ||
} | ||
|
||
protected Entry filterEntry(Entry entry) { | ||
protected Entry filterEntry(Entry entry, Function<Entry, Boolean> op) { | ||
Entry filteredEntry = null; | ||
for (WALEntryFilter filter : filters) { | ||
if (entry == null) { | ||
return null; | ||
filteredEntry = filter.filter(entry); | ||
if(op.apply(filteredEntry)){ | ||
return filteredEntry; | ||
} | ||
entry = filter.filter(entry); | ||
} | ||
return entry; | ||
return filteredEntry; | ||
} | ||
|
||
protected void filterCells(Entry entry) { | ||
if (entry == null || cellFilters.length == 0) { | ||
return; | ||
} | ||
WALUtil.filterCells(entry.getEdit(), c -> filterCell(entry, c)); | ||
WALUtil.filterCells(entry.getEdit(), c -> filterCell(entry, c, operator.cellOp)); | ||
} | ||
|
||
private Cell filterCell(Entry entry, Cell cell) { | ||
private Cell filterCell(Entry entry, Cell cell, Function<Cell, Boolean> op) { | ||
if (cell == null) { | ||
return null; | ||
} | ||
Cell filteredCell = null; | ||
for (WALCellFilter filter : cellFilters) { | ||
cell = filter.filterCell(entry, cell); | ||
if (cell == null) { | ||
break; | ||
filteredCell = filter.filterCell(entry, cell); | ||
if (op.apply(filteredCell)) { | ||
return filteredCell; | ||
} | ||
} | ||
return cell; | ||
return filteredCell; | ||
} | ||
|
||
public enum Operator { | ||
AND(e -> e == null, c -> c == null), | ||
OR(e -> e != null, c -> c != null); | ||
|
||
Function<Entry,Boolean> entryOp; | ||
Function<Cell,Boolean> cellOp; | ||
|
||
Operator(Function<Entry, Boolean> entryOp, Function<Cell, Boolean> cellOp) { | ||
this.entryOp = entryOp; | ||
this.cellOp = cellOp; | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,11 +34,14 @@ def help | |
to the peer cluster. | ||
An optional parameter for table column families identifies which tables and/or column families | ||
will be replicated to the peer cluster. | ||
An optional parameter for the boolean operator to be applied over different WAL Entry filters. If | ||
omitted, conjunction (AND) is applied. | ||
Comment on lines
+37
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if, through the shell, we should provide some more simplicity for the operator. They are unaware of any of the WALFilters that we are setting behind the scenes. To them, this operator would be nothing more than a "magic word" (e.g. "I put 'OR' and then my data gets replicated"). I guess it's better to get this code committed and then think about ways to make it more clear to admins. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed this ended up too "programming oriented". Maybe we could change to a more meaningful boolean property, such as: "PASS_ONE_FILTER_ONLY"? |
||
An optional parameter for serial flag identifies whether or not the replication peer is a serial | ||
replication peer. The default serial flag is false. | ||
|
||
Note: Set a namespace in the peer config means that all tables in this namespace | ||
will be replicated to the peer cluster. So if you already have set a namespace in peer config, | ||
will be replicated to the peer cluster (If the 'OR' operator has been defined). | ||
So if you already have set a namespace in peer config, | ||
then you can't set this namespace's tables in the peer config again. | ||
|
||
Examples: | ||
|
@@ -50,6 +53,8 @@ def help | |
TABLE_CFS => { "table1" => [], "table2" => ["cf1"], "table3" => ["cf1", "cf2"] } | ||
hbase> add_peer '2', CLUSTER_KEY => "zk1,zk2,zk3:2182:/hbase-prod", | ||
NAMESPACES => ["ns1", "ns2", "ns3"] | ||
hbase> add_peer '2', CLUSTER_KEY => "zk1,zk2,zk3:2182:/hbase-prod", | ||
NAMESPACES => ["ns1", "ns2", "ns3"], OPERATOR => "OR" | ||
hbase> add_peer '2', CLUSTER_KEY => "zk1,zk2,zk3:2182:/hbase-prod", | ||
NAMESPACES => ["ns1", "ns2"], TABLE_CFS => { "ns3:table1" => [], "ns3:table2" => ["cf1"] } | ||
hbase> add_peer '3', CLUSTER_KEY => "zk1,zk2,zk3:2182:/hbase-prod", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should apply validation here (in addition to or instead of Ruby) as the Java API to set the chainOperator as a user could be writing Java code directly instead of writing Ruby code to interact with HBase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could add some validation here, but none of the other already existing fields are doing much validation either. An invalid value would fail the addPeer operation later on ChainWALEntryFilter constructor, as the enum "valueOf" call would raise an exception.