Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DBNode] - Add support for passive replication between M3DB Clusters #1874

Merged
merged 21 commits into from
Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ docker-integration-test:
@./scripts/docker-integration-tests/carbon/test.sh
@./scripts/docker-integration-tests/aggregator/test.sh
@./scripts/docker-integration-tests/query_fanout/test.sh
@./scripts/docker-integration-tests/repair/test.sh
@./scripts/docker-integration-tests/replication/test.sh
@./scripts/docker-integration-tests/repair_and_replication/test.sh

.PHONY: site-build
site-build:
Expand Down
23 changes: 13 additions & 10 deletions scripts/docker-integration-tests/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@ function setup_single_m3db_node {
}

function setup_three_m3db_nodes {
local dbnode_host_1=${DBNODE_HOST:-dbnode01}
local dbnode_host_2=${DBNODE_HOST:-dbnode02}
local dbnode_host_3=${DBNODE_HOST:-dbnode03}
local dbnode_id_1=${DBNODE_ID_01:-m3db_local_1}
local dbnode_id_2=${DBNODE_ID_02:-m3db_local_2}
local dbnode_id_3=${DBNODE_ID_03:-m3db_local_3}
local dbnode_host_1=${DBNODE_HOST_01:-dbnode01}
local dbnode_host_2=${DBNODE_HOST_02:-dbnode02}
local dbnode_host_3=${DBNODE_HOST_03:-dbnode03}
local dbnode_port=${DBNODE_PORT:-9000}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT:-9022}
local dbnode_host_3_health_port=${DBNODE_HEALTH_PORT:-9032}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT_01:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT_02:-9022}
local dbnode_host_3_health_port=${DBNODE_HEALTH_PORT_03:-9032}
local coordinator_port=${COORDINATOR_PORT:-7201}

echo "Wait for API to be available"
Expand All @@ -70,23 +73,23 @@ function setup_three_m3db_nodes {
"replicationFactor": 3,
"hosts": [
{
"id": "m3db_local_1",
"id": "'"${dbnode_id_1}"'",
"isolation_group": "rack-a",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_1}"'",
"port": '"${dbnode_port}"'
},
{
"id": "m3db_local_2",
"id": "'"${dbnode_id_2}"'",
"isolation_group": "rack-b",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_2}"'",
"port": '"${dbnode_port}"'
},
{
"id": "m3db_local_3",
"id": "'"${dbnode_id_3}"'",
"isolation_group": "rack-c",
"zone": "embedded",
"weight": 1024,
Expand All @@ -98,7 +101,7 @@ function setup_three_m3db_nodes {

echo "Wait until placement is init'd"
ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.m3db_local_1.id)" == \"m3db_local_1\" ]'
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.'"${dbnode_id_1}"'.id)" == \"'"${dbnode_id_1}"'\" ]'

wait_for_namespaces

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
version: "3.5"
services:
cluster_a_dbnode01:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9012:9002"
- "0.0.0.0:9013:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_a_m3db_local_1
volumes:
- "./m3dbnode-cluster-a.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_a_dbnode02:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9022:9002"
- "0.0.0.0:9023:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_a_m3db_local_2
volumes:
- "./m3dbnode-cluster-a.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_a_dbnode03:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9032:9002"
- "0.0.0.0:9033:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_a_m3db_local_3
volumes:
- "./m3dbnode-cluster-a.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_a_coordinator01:
expose:
- "7201"
- "7203"
- "7204"
ports:
- "0.0.0.0:7201:7201"
- "0.0.0.0:7203:7203"
- "0.0.0.0:7204:7204"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator-cluster-a.yml/:/etc/m3coordinator/m3coordinator.yml"
cluster_b_dbnode01:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9112:9002"
- "0.0.0.0:9113:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_b_m3db_local_1
volumes:
- "./m3dbnode-cluster-b.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_b_dbnode02:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9122:9002"
- "0.0.0.0:9123:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_b_m3db_local_2
volumes:
- "./m3dbnode-cluster-b.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_b_dbnode03:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9132:9002"
- "0.0.0.0:9133:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_b_m3db_local_3
volumes:
- "./m3dbnode-cluster-b.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_b_coordinator01:
expose:
- "7201"
- "7203"
- "7204"
ports:
- "0.0.0.0:17201:7201"
- "0.0.0.0:17203:7203"
- "0.0.0.0:17204:7204"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator-cluster-b.yml:/etc/m3coordinator/m3coordinator.yml"
networks:
backend:
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

limits:
perQuery:
maxFetchedSeries: 100

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10h
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_a_dbnode01:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

limits:
perQuery:
maxFetchedSeries: 100

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10h
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_b_dbnode01:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
db:
logging:
level: info

tracing:
backend: jaeger
jaeger:
reporter:
localAgentHostPort: jaeger:6831
sampler:
type: const
param: 1

metrics:
prometheus:
handlerPath: /metrics
sanitization: prometheus
samplingRate: 1.0
extended: detailed

listenAddress: 0.0.0.0:9000
clusterListenAddress: 0.0.0.0:9001
httpNodeListenAddress: 0.0.0.0:9002
httpClusterListenAddress: 0.0.0.0:9003
debugListenAddress: 0.0.0.0:9004

hostID:
resolver: environment
envVarName: M3DB_HOST_ID

client:
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

gcPercentage: 100

writeNewSeriesAsync: true
writeNewSeriesLimitPerSecond: 1048576
writeNewSeriesBackoffDuration: 2ms

bootstrap:
# Intentionally disable peers bootstrapper to ensure it doesn't interfere with test.
bootstrappers:
- filesystem
- commitlog
- uninitialized_topology
commitlog:
returnUnfulfilledForCorruptCommitLogFiles: false

cache:
series:
policy: lru
postingsList:
size: 262144

commitlog:
flushMaxBytes: 524288
flushEvery: 1s
queue:
calculationType: fixed
size: 2097152

fs:
filePathPrefix: /var/lib/m3db

config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_a_dbnode01:2379
seedNodes:
initialCluster:
- hostID: cluster_a_m3db_local_1
endpoint: http://cluster_a_dbnode01:2380

# Enable repairs (within cluster a).
repair:
enabled: true
throttle: 1ms
checkInterval: 1ms

# Enable replication (from cluster b).
replication:
clusters:
- name: "cluster-b"
repairEnabled: true
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_b_dbnode01:2379

Loading