Pod Kill Chaos Test For Kafka Version #942
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Pod Kill Chaos Test For Kafka Version | |
on: | |
workflow_dispatch: | |
inputs: | |
image_tag: | |
description: The image tag to use for the chaos test | |
required: true | |
default: 'master-latest' | |
image_repo: | |
description: The image repo to use for the chaos test | |
required: true | |
default: 'milvusdb/milvus-dev' | |
schedule: | |
- cron: "30 17 * * *" | |
jobs: | |
test-pod-kill-chaos: | |
runs-on: ubuntu-latest | |
timeout-minutes: 40 | |
strategy: | |
fail-fast: false | |
matrix: | |
pod: [allstandalone, allcluster, standalone, datacoord, datanode, indexcoord, indexnode, proxy, kafka, querycoord, querynode, rootcoord, etcd, minio] | |
steps: | |
- name: Set env param | |
env: | |
DEFAULT_IMAGE_TAG: master-latest | |
DEFAULT_IMAGE_REPO: milvusdb/milvus-dev | |
run: | | |
echo "RELEASE=test-${{ matrix.pod }}-pod-kill" >> $GITHUB_ENV | |
echo "IMAGE_REPO=${{ github.event.inputs.image_repo || env.DEFAULT_IMAGE_REPO}}" >> $GITHUB_ENV | |
echo "IMAGE_TAG=${{ github.event.inputs.image_tag || env.DEFAULT_IMAGE_TAG}}" >> $GITHUB_ENV | |
- name: Creating kind cluster | |
uses: helm/[email protected] | |
- name: Print cluster information | |
run: | | |
kubectl config view | |
kubectl cluster-info | |
kubectl get nodes | |
kubectl get pods -n kube-system | |
helm version | |
kubectl version | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install dependency | |
uses: nick-invision/retry@v2 | |
with: | |
timeout_minutes: 5 | |
max_attempts: 3 | |
retry_on: error | |
shell: bash | |
command: | | |
pip install -r tests/python_client/requirements.txt --trusted-host https://test.pypi.org | |
- name: Deploy Chaos Mesh | |
timeout-minutes: 2 | |
shell: bash | |
run: | | |
helm repo add chaos-mesh https://charts.chaos-mesh.org | |
helm search repo chaos-mesh | |
kubectl create ns chaos-testing | |
helm install --wait --timeout 360s chaos-mesh chaos-mesh/chaos-mesh --namespace=chaos-testing --version v2.0.3 --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock | |
kubectl get po -n chaos-testing | |
- name: Deploy Milvus | |
timeout-minutes: 15 | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
echo "latest tag:" | |
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus-dev -t master-latest -f master- -F -L -q | |
helm repo add milvus https://milvus-io.github.io/milvus-helm | |
helm repo update | |
if [[ ${{ matrix.pod }} != *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set pulsar.enabled=false --set kafka.enabled=true --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} -f cluster-values.yaml -n=chaos-testing; fi | |
if [[ ${{ matrix.pod }} == *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set pulsar.enabled=false --set kafka.enabled=true --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} -f standalone-values.yaml -n=chaos-testing; fi | |
kubectl get pods -n chaos-testing | |
sleep 20s | |
kubectl get pods -n chaos-testing | |
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 & | |
sleep 20s | |
# check whether port-forward success | |
nc -vz 127.0.0.1 19530 | |
# check whether milvus server is healthy | |
python scripts/hello_milvus.py | |
- name: Chaos Test | |
timeout-minutes: 15 | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
# replace chaos object | |
sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/pod_kill\/'/g" constants.py | |
sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_kill.yaml\'/g" constants.py | |
sed -i "s/RELEASE_NAME =.*/RELEASE_NAME = \'${{ env.RELEASE }}\'/g" constants.py | |
cat constants.py | |
timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "chaos test failed" | |
- name: Result Analysis | |
timeout-minutes: 1 | |
shell: bash | |
working-directory: tests/python_client/chaos/reports | |
run: | | |
echo "result analysis" | |
cat ${{ env.RELEASE }}.log || echo "no log file" | |
- name: Wait all pods ready | |
timeout-minutes: 5 | |
shell: bash | |
working-directory: tests/python_client | |
run: | | |
kubectl get pod -n chaos-testing | |
# wait all pod to be ready | |
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s | |
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s | |
kubectl get pod -n chaos-testing | |
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9 | |
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 & | |
sleep 20s | |
nc -vz 127.0.0.1 19530 | |
- name: Run e2e test after chaos | |
timeout-minutes: 5 | |
shell: bash | |
working-directory: tests/python_client | |
run: | | |
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no | |
- name: Run hello_milvus after chaos | |
timeout-minutes: 5 | |
shell: bash | |
working-directory: tests/python_client | |
run: | | |
python chaos/scripts/hello_milvus.py --host 127.0.0.1 | |
- name: Verify all collections after chaos | |
timeout-minutes: 15 | |
shell: bash | |
working-directory: tests/python_client | |
run: | | |
python chaos/scripts/verify_all_collections.py --host 127.0.0.1 | |
- name: Export logs | |
if: ${{ always() }} | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
#in this step, verify whether pod has been killed by pod's age | |
kubectl get po -n chaos-testing | |
# export k8s log for chaos mesh and milvus | |
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/chaos-test | |
- name: Deploy Milvus Again If Previous E2E Test Failed | |
timeout-minutes: 15 | |
if: ${{ failure() }} | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
kubectl config set-context --current --namespace=chaos-testing | |
bash scripts/uninstall_milvus.sh ${{ env.RELEASE }} | |
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} -f cluster-values.yaml -n=chaos-testing; fi | |
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false -n=chaos-testing; fi | |
kubectl get pods -n chaos-testing | |
sleep 20s | |
kubectl get pods -n chaos-testing | |
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9 | |
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 & | |
sleep 20s | |
# check whether port-forward success | |
nc -vz 127.0.0.1 19530 | |
# check whether milvus server is healthy | |
python scripts/hello_milvus.py | |
- name: Data Consist Test | |
timeout-minutes: 5 | |
if: ${{ always() }} | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO --capture=no | |
- name: Milvus E2E Test | |
timeout-minutes: 10 | |
if: ${{ always() }} | |
shell: bash | |
working-directory: tests/python_client | |
run: | | |
kubectl get pod -n chaos-testing | |
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s | |
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s | |
kubectl get pod -n chaos-testing | |
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9 | |
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 & | |
sleep 20s | |
nc -vz 127.0.0.1 19530 | |
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no | |
python chaos/scripts/hello_milvus.py --host 127.0.0.1 | |
- name: Export logs | |
if: ${{ always() }} | |
shell: bash | |
working-directory: tests/python_client/chaos | |
run: | | |
#in this step, verify whether pod has been killed by pod's age | |
kubectl get po -n chaos-testing | |
# export k8s log for chaos mesh and milvus | |
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/data-consist-test | |
bash ../../scripts/export_log_k8s.sh chaos-testing chaos-daemon k8s_logs/chaos-mesh-daemon | |
- name: Upload logs | |
if: ${{ ! success() }} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: logs-${{ matrix.pod }} | |
path: | | |
tests/python_client/chaos/k8s_logs | |
tests/python_client/chaos/reports |