Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DBNode] - Add support for passive replication between M3DB Clusters #1874

Merged
merged 21 commits into from
Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ steps:
<<: *common
- name: "Integration (:docker:)"
command: make clean install-vendor docker-integration-test
parallelism: 2
env:
CGO_ENABLED: 0
GIMME_GO_VERSION: 1.12.x
Expand Down
8 changes: 1 addition & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,7 @@ docs-test:
.PHONY: docker-integration-test
docker-integration-test:
@echo "--- Running Docker integration test"
@./scripts/docker-integration-tests/setup.sh
@./scripts/docker-integration-tests/simple/test.sh
@./scripts/docker-integration-tests/cold_writes_simple/test.sh
@./scripts/docker-integration-tests/prometheus/test.sh
@./scripts/docker-integration-tests/carbon/test.sh
@./scripts/docker-integration-tests/aggregator/test.sh
@./scripts/docker-integration-tests/query_fanout/test.sh
./scripts/docker-integration-tests/run.sh

.PHONY: site-build
site-build:
Expand Down
34 changes: 12 additions & 22 deletions scripts/docker-integration-tests/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ function setup_single_m3db_node {
wait_for_db_init
}

function setup_three_m3db_nodes {
local dbnode_host_1=${DBNODE_HOST:-dbnode01}
local dbnode_host_2=${DBNODE_HOST:-dbnode02}
local dbnode_host_3=${DBNODE_HOST:-dbnode03}
function setup_two_m3db_nodes {
local dbnode_id_1=${DBNODE_ID_01:-m3db_local_1}
local dbnode_id_2=${DBNODE_ID_02:-m3db_local_2}
local dbnode_host_1=${DBNODE_HOST_01:-dbnode01}
local dbnode_host_2=${DBNODE_HOST_02:-dbnode02}
local dbnode_port=${DBNODE_PORT:-9000}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT:-9022}
local dbnode_host_3_health_port=${DBNODE_HEALTH_PORT:-9032}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT_01:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT_02:-9022}
local coordinator_port=${COORDINATOR_PORT:-7201}

echo "Wait for API to be available"
Expand All @@ -66,39 +66,31 @@ function setup_three_m3db_nodes {
"type": "cluster",
"namespaceName": "agg",
"retentionTime": "6h",
"num_shards": 3,
"replicationFactor": 3,
"num_shards": 2,
"replicationFactor": 2,
"hosts": [
{
"id": "m3db_local_1",
"id": "'"${dbnode_id_1}"'",
"isolation_group": "rack-a",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_1}"'",
"port": '"${dbnode_port}"'
},
{
"id": "m3db_local_2",
"id": "'"${dbnode_id_2}"'",
"isolation_group": "rack-b",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_2}"'",
"port": '"${dbnode_port}"'
},
{
"id": "m3db_local_3",
"isolation_group": "rack-c",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_3}"'",
"port": '"${dbnode_port}"'
}
]
}'

echo "Wait until placement is init'd"
ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.m3db_local_1.id)" == \"m3db_local_1\" ]'
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.'"${dbnode_id_1}"'.id)" == \"'"${dbnode_id_1}"'\" ]'

wait_for_namespaces

Expand All @@ -107,8 +99,6 @@ function setup_three_m3db_nodes {
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_1_health_port}"'/health | jq .bootstrapped)" == true ]'
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_2_health_port}"'/health | jq .bootstrapped)" == true ]'
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_3_health_port}"'/health | jq .bootstrapped)" == true ]'
}

function wait_for_db_init {
Expand Down
14 changes: 0 additions & 14 deletions scripts/docker-integration-tests/repair/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,6 @@ services:
- M3DB_HOST_ID=m3db_local_2
volumes:
- "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml"
dbnode03:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9032:9002"
- "0.0.0.0:9033:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=m3db_local_3
volumes:
- "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml"
coordinator01:
expose:
- "7201"
Expand Down
9 changes: 2 additions & 7 deletions scripts/docker-integration-tests/repair/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ export REVISION
echo "Run m3dbnode and m3coordinator containers"
docker-compose -f ${COMPOSE_FILE} up -d --renew-anon-volumes dbnode01
docker-compose -f ${COMPOSE_FILE} up -d --renew-anon-volumes dbnode02
docker-compose -f ${COMPOSE_FILE} up -d --renew-anon-volumes dbnode03
docker-compose -f ${COMPOSE_FILE} up -d --renew-anon-volumes coordinator01

# Think of this as a defer func() in golang
Expand All @@ -20,7 +19,7 @@ function defer {
}
trap defer EXIT

setup_three_m3db_nodes
setup_two_m3db_nodes

function write_data {
namespace=$1
Expand Down Expand Up @@ -76,11 +75,7 @@ write_data "coldWritesRepairAndNoIndex" "foo" "$(($(date +"%s") - 60 * 60 * 2))"
echo "Expect to read the data back from dbnode01"
read_all "coldWritesRepairAndNoIndex" "foo" 1 9012

# These two should eventually succeed once a repair detects the mismatch.
# This should eventually succeed once a repair detects the mismatch.
echo "Wait for the data to become available (via repairs) from dbnode02"
ATTEMPTS=30 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
read_all "coldWritesRepairAndNoIndex" "foo" 1 9022

echo "Wait for the data to become available (via repairs) from dbnode03"
ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
read_all "coldWritesRepairAndNoIndex" "foo" 1 9032
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
version: "3.5"
services:
cluster_a_dbnode01:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9012:9002"
- "0.0.0.0:9013:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_a_m3db_local_1
volumes:
- "./m3dbnode-cluster-a.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_a_dbnode02:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9022:9002"
- "0.0.0.0:9023:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_a_m3db_local_2
volumes:
- "./m3dbnode-cluster-a.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_a_coordinator01:
expose:
- "7201"
- "7203"
- "7204"
ports:
- "0.0.0.0:7201:7201"
- "0.0.0.0:7203:7203"
- "0.0.0.0:7204:7204"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator-cluster-a.yml/:/etc/m3coordinator/m3coordinator.yml"
cluster_b_dbnode01:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9112:9002"
- "0.0.0.0:9113:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_b_m3db_local_1
volumes:
- "./m3dbnode-cluster-b.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_b_dbnode02:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9122:9002"
- "0.0.0.0:9123:9003"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
environment:
- M3DB_HOST_ID=cluster_b_m3db_local_2
volumes:
- "./m3dbnode-cluster-b.yml:/etc/m3dbnode/m3dbnode.yml"
cluster_b_coordinator01:
expose:
- "7201"
- "7203"
- "7204"
ports:
- "0.0.0.0:17201:7201"
- "0.0.0.0:17203:7203"
- "0.0.0.0:17204:7204"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator-cluster-b.yml:/etc/m3coordinator/m3coordinator.yml"
networks:
backend:
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

limits:
perQuery:
maxFetchedSeries: 100

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10h
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_a_dbnode01:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

limits:
perQuery:
maxFetchedSeries: 100

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10h
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- cluster_b_dbnode01:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
Loading