Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Feature/ci #241

Closed
wants to merge 13 commits into from
Closed
15 changes: 15 additions & 0 deletions .ci/integration-tests/aws-eks/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
apiVersion: t2.stackable.tech/v1
kind: Infra
template: aws-eks
metadata:
name: druid-operator-integration-tests
description: "Cluster for Druid Operator Integration Tests (AWS EKS)"
publicKeys: []
spec:
region: "eu-central-1"
awsInstanceType: "t2.medium"
versions:
_-operator: NIGHTLY
druid-operator: "$DRUID_OPERATOR_VERSION"
node_count: 3
6 changes: 6 additions & 0 deletions .ci/integration-tests/aws-eks/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
git clone -b "$GIT_BRANCH" https://github.com/stackabletech/druid-operator.git
(cd druid-operator/ && ./scripts/run_tests.sh --parallel 1)
exit_code=$?
./operator-logs.sh druid > /target/druid-operator.log
exit $exit_code
13 changes: 13 additions & 0 deletions .ci/integration-tests/azure-aks/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
apiVersion: t2.stackable.tech/v1
kind: Infra
template: azure-aks
metadata:
name: druid-operator-integration-tests
description: "Cluster for Druid Operator Integration Tests (Azure AKS)"
publicKeys: []
spec:
versions:
_-operator: NIGHTLY
druid-operator: "$DRUID_OPERATOR_VERSION"
node_count: 3
6 changes: 6 additions & 0 deletions .ci/integration-tests/azure-aks/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
git clone -b "$GIT_BRANCH" https://github.com/stackabletech/druid-operator.git
(cd druid-operator/ && ./scripts/run_tests.sh --parallel 1)
exit_code=$?
./operator-logs.sh druid > /target/druid-operator.log
exit $exit_code
19 changes: 19 additions & 0 deletions .ci/integration-tests/hcloud-centos-8/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
apiVersion: t2.stackable.tech/v1
kind: Infra
template: hcloud-centos-8
metadata:
name: druid-operator-integration-tests
description: "Cluster for Druid Operator Integration Tests (Hetzner Cloud / CentOS 8)"
domain: stackable.test
publicKeys: []
spec:
location: "hel1"
k8sVersion: "$K8S_VERSION"
wireguard: false
versions:
_-operator: NIGHTLY
druid-operator: "$DRUID_OPERATOR_VERSION"
nodes:
main:
numberOfNodes: 3
6 changes: 6 additions & 0 deletions .ci/integration-tests/hcloud-centos-8/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
git clone -b "$GIT_BRANCH" https://github.com/stackabletech/druid-operator.git
(cd druid-operator/ && ./scripts/run_tests.sh --parallel 1)
exit_code=$?
./operator-logs.sh druid > /target/druid-operator.log
exit $exit_code
15 changes: 15 additions & 0 deletions .ci/integration-tests/ionos-k8s/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
apiVersion: t2.stackable.tech/v1
kind: Infra
template: ionos-k8s
metadata:
name: druid-operator-integration-tests
description: "Cluster for Druid Operator Integration Tests (IONOS Cloud managed K8s)"
domain: stackable.test
publicKeys: []
spec:
region: de/fra
versions:
_-operator: NIGHTLY
druid-operator: "$DRUID_OPERATOR_VERSION"
node_count: 3
6 changes: 6 additions & 0 deletions .ci/integration-tests/ionos-k8s/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
git clone -b "$GIT_BRANCH" https://github.com/stackabletech/druid-operator.git
(cd druid-operator/ && ./scripts/run_tests.sh --parallel 1)
exit_code=$?
./operator-logs.sh druid > /target/druid-operator.log
exit $exit_code
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Added

- Readiness probe added ([#241])

### Changed

- BREAKING: The deep storage on s3 and the s3 config for ingestion have been changed to use the operator-rs commons::s3 structs ([#228])
Expand All @@ -12,6 +16,7 @@ All notable changes to this project will be documented in this file.

[#228]: https://github.com/stackabletech/druid-operator/pull/228
[#238]: https://github.com/stackabletech/druid-operator/pull/238
[#241]: https://github.com/stackabletech/druid-operator/pull/241

## [0.5.0] - 2022-03-15

Expand Down
19 changes: 16 additions & 3 deletions rust/operator-binary/src/druid_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@ use stackable_druid_crd::{
DS_BUCKET, JVM_CONFIG, LOG4J2_CONFIG, RUNTIME_PROPS, S3_ENDPOINT_URL, S3_SECRET_DIR_NAME,
ZOOKEEPER_CONNECTION_STRING,
};
use stackable_operator::commons::s3::S3ConnectionSpec;
use stackable_operator::{
builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder, PodBuilder, VolumeBuilder},
commons::opa::OpaApiVersion,
commons::{opa::OpaApiVersion, s3::S3ConnectionSpec},
k8s_openapi::{
api::{
apps::v1::{StatefulSet, StatefulSetSpec},
core::v1::{ConfigMap, EnvVar, Service, ServicePort, ServiceSpec},
core::v1::{
ConfigMap, EnvVar, Probe, Service, ServicePort, ServiceSpec, TCPSocketAction,
},
},
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
},
Expand Down Expand Up @@ -541,6 +542,18 @@ fn build_rolegroup_statefulset(
.build(),
);

// readiness probe
let probe = Probe {
tcp_socket: Some(TCPSocketAction {
port: IntOrString::Int(role.get_http_port().into()),
..Default::default()
}),
initial_delay_seconds: Some(30),
period_seconds: Some(5),
..Default::default()
};
cb.readiness_probe(probe);

let mut container = cb.build();
container.image_pull_policy = Some("IfNotPresent".to_string());
pb.add_container(container);
Expand Down
2 changes: 2 additions & 0 deletions tests/kuttl-test.yaml.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ testDirs:

startKIND: false
suppress: ["events"]
parallel: 1
reportFormat: json
6 changes: 6 additions & 0 deletions tests/templates/kuttl/authorizer/00-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,11 @@ kind: StatefulSet
metadata:
name: druid-zk-server-default
status:
availableReplicas: 1
readyReplicas: 1
replicas: 1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hdfs-znode
2 changes: 1 addition & 1 deletion tests/templates/kuttl/authorizer/02-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: kuttl.dev/v1beta1
kind: TestAssert
metadata:
name: druid-hdfs
timeout: 300
timeout: 600
---
apiVersion: apps/v1
kind: StatefulSet
Expand Down
2 changes: 1 addition & 1 deletion tests/templates/kuttl/authorizer/02-install-hdfs.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: kuttl.dev/v1beta1
kind: TestStep
metadata:
name: druid-hdfs
timeout: 180
timeout: 600
---
apiVersion: hdfs.stackable.tech/v1alpha1
kind: HdfsCluster
Expand Down
1 change: 1 addition & 0 deletions tests/templates/kuttl/authorizer/06-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ metadata:
name: install-healthcheck-files
commands:
- script: kubectl exec -n $NAMESPACE authcheck-0 -- python /tmp/authcheck.py
timeout: 600
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestStep
timeout: 600
commands:
- script: kubectl cp -n $NAMESPACE ./authcheck.py authcheck-0:/tmp
- script: kubectl cp -n $NAMESPACE ./requirements.txt authcheck-0:/tmp
Expand Down
6 changes: 6 additions & 0 deletions tests/templates/kuttl/ingestion-no-s3-ext/00-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,11 @@ kind: StatefulSet
metadata:
name: druid-zk-server-default
status:
availableReplicas: 1
readyReplicas: 1
replicas: 1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hdfs-znode
2 changes: 1 addition & 1 deletion tests/templates/kuttl/ingestion-no-s3-ext/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: kuttl.dev/v1beta1
kind: TestAssert
metadata:
name: druid-hdfs
timeout: 300
timeout: 600
---
apiVersion: apps/v1
kind: StatefulSet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: kuttl.dev/v1beta1
kind: TestStep
metadata:
name: druid-hdfs
timeout: 180
timeout: 600
---
apiVersion: hdfs.stackable.tech/v1alpha1
kind: HdfsCluster
Expand Down
1 change: 1 addition & 0 deletions tests/templates/kuttl/ingestion-no-s3-ext/04-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ metadata:
name: install-healthcheck-files
commands:
- script: kubectl exec -n $NAMESPACE checks-0 -- python /tmp/healthcheck.py
timeout: 300
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestStep
timeout: 600
commands:
- script: kubectl cp -n $NAMESPACE ./healthcheck.py checks-0:/tmp
- script: kubectl cp -n $NAMESPACE ./requirements.txt checks-0:/tmp
Expand Down
1 change: 1 addition & 0 deletions tests/templates/kuttl/ingestion-no-s3-ext/05-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ metadata:
name: ingestion-check
commands:
- script: kubectl exec -n $NAMESPACE checks-0 -- python /tmp/ingestioncheck.py derby-druid
timeout: 300
40 changes: 35 additions & 5 deletions tests/templates/kuttl/ingestion-no-s3-ext/healthcheck.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests
import sys
import logging
import time

if __name__ == "__main__":
result = 0
Expand All @@ -26,9 +27,38 @@

for role in druid_roles:
url = f"http://{druid_cluster_name}-{role}-default:{druid_ports[role]}/status/health"
res = requests.get(url)
if res.status_code != 200 or res.text.lower() != "true":
result = 1
break
count = 1

sys.exit(result)
# As this script is intended to be executed by Kuttl which is in charge of overall test timeouts it is ok
# to loop infinitely here - or until all tests succeed
# The script iterates over all known ports and services and checks that the ports are available
# The timeout for this connection attempt is configured to 5 seconds, to ensure frequent retries that are
# not handled internally by the requests library, because it was unclear when or if dns entries are cached
# internally during retry handling.
# By issuing a new call to .get() we are trying to ensure a new dns lookup for the target.
#
# Any errors are logged and retried until either the test succeeds or Kuttl kills this script due to
# the timeout.
while True:
try:
count = count + 1
print(f"Checking role [{role}] on url [{url}]")
res = requests.get(url, timeout=5)
code = res.status_code
if res.status_code == 200 and res.text.lower() == "true":
break
else:
print(f"Got non 200 status code [{res.status_code}] or non-true response [{res.text.lower()}], retrying attempt no [{count}] ....")
except requests.exceptions.Timeout:
print(f"Connection timed out, retrying attempt no [{count}] ....")
except requests.ConnectionError as e:
print(f"Connection Error: {str(e)}")
except requests.RequestException as e:
print(f"General Error: {str(e)}")
except Exception:
print(f"Unhandled error occurred, retrying attempt no [{count}] ....")

# Wait a little bit before retrying
time.sleep(1)

sys.exit(0)
38 changes: 35 additions & 3 deletions tests/templates/kuttl/ingestion-no-s3-ext/ingestioncheck.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import urllib

import requests
import http
import sys
Expand All @@ -11,6 +13,11 @@ def __init__(self):
self.session.headers.update({'Accept': 'application/json', 'Content-Type': 'application/json'})
http.client.HTTPConnection.debuglevel = 1

def get(self, url):
response = self.session.get(url)
assert response.status_code == 200
return response.text

def get_tasks(self, url):
response = self.session.get(url)
assert response.status_code == 200
Expand All @@ -21,6 +28,10 @@ def post_task(self, url, input):
assert response.status_code == 200
return response.text

def check_rc(self, url):
response = self.session.get(url)
return response.status_code

def query_datasource(self, url, sql, expected, iterations):
loop = 0
while True:
Expand Down Expand Up @@ -53,7 +64,9 @@ def query_datasource(self, url, sql, expected, iterations):
url=f"http://{druid_cluster_name}-coordinator-default:8081/druid/indexer/v1/task",
input='/tmp/druid-quickstartimport.json'
)

task_id = json.loads(ingestion)["task"]
url_encoded_taskid = urllib.parse.quote(task_id, safe='')
print(f"TASKID: [{task_id}]")
print('''
Re-query tasks
==============''')
Expand All @@ -66,9 +79,28 @@ def query_datasource(self, url, sql, expected, iterations):
assert new_task_count == task_count + 1

print('''
Wait for ingestion task and datasource
Wait for ingestion task to succeed
======================================''')
job_finished = False
while not job_finished:
time.sleep(5)
task = druid.get(
url=f"http://{druid_cluster_name}-coordinator-default:8081/druid/indexer/v1/task/{url_encoded_taskid}/status",
)
task_status = json.loads(task)["status"]["statusCode"]
print(f"Current task status: [{task_status}]")
assert task_status == "RUNNING" or task_status == "SUCCESS", f"Taskstatus not running or succeeeded: {task_status}"
job_finished = task_status == "SUCCESS"

print('''
Wait for broker to indicate all segments are fully online
======================================''')
time.sleep(30)
broker_ready = False
while not broker_ready:
time.sleep(2)
broker_ready_rc = druid.check_rc(f"http://{druid_cluster_name}-broker-default:8082/druid/broker/v1/readiness")
broker_ready = broker_ready_rc == 200
print(f"Broker respondend with [{broker_ready_rc}] to readiness check")

print('''
Datasource SQL
Expand Down
6 changes: 6 additions & 0 deletions tests/templates/kuttl/ingestion-s3-ext/00-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,11 @@ kind: StatefulSet
metadata:
name: druid-zk-server-default
status:
availableReplicas: 1
readyReplicas: 1
replicas: 1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hdfs-znode
2 changes: 1 addition & 1 deletion tests/templates/kuttl/ingestion-s3-ext/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: kuttl.dev/v1beta1
kind: TestAssert
metadata:
name: druid-hdfs
timeout: 300
timeout: 600
---
apiVersion: apps/v1
kind: StatefulSet
Expand Down
Loading