Skip to content

Commit

Permalink
Add test for autoscaling count
Browse files Browse the repository at this point in the history
[skip ci]

Signed-off-by: Viet Nguyen Duc <[email protected]>
  • Loading branch information
VietND96 committed Dec 1, 2024
1 parent 31fba3c commit d1ac580
Show file tree
Hide file tree
Showing 12 changed files with 293 additions and 50 deletions.
59 changes: 15 additions & 44 deletions .github/workflows/k8s-scaling-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ jobs:
matrix:
include:
- k8s-version: 'v1.31.2'
test-strategy: chart_test_autoscaling_job_count_chaos
cluster: 'minikube'
helm-version: 'v3.16.3'
docker-version: '27.3.1'
python-version: '3.13'
- k8s-version: 'v1.31.2'
test-strategy: chart_test_autoscaling_job_count_max_sessions
cluster: 'minikube'
helm-version: 'v3.16.3'
docker-version: '27.3.1'
python-version: '3.13'
- k8s-version: 'v1.31.2'
test-strategy: chart_test_autoscaling_job_count
cluster: 'minikube'
helm-version: 'v3.16.3'
docker-version: '27.3.1'
Expand Down Expand Up @@ -103,54 +116,12 @@ jobs:
timeout_minutes: 30
max_attempts: 3
command: |
NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_chaos
- name: Upload results
if: always()
uses: actions/upload-artifact@main
with:
name: chart_test_autoscaling_job_count_chaos
path: ./tests/tests/*.md
if-no-files-found: ignore
- name: Test Selenium Grid on Kubernetes with Autoscaling
uses: nick-invision/retry@master
with:
timeout_minutes: 30
max_attempts: 3
command: |
NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_max_sessions
- name: Upload results
if: always()
uses: actions/upload-artifact@main
with:
name: chart_test_autoscaling_job_count_max_sessions
path: ./tests/tests/*.md
if-no-files-found: ignore
- name: Test Selenium Grid on Kubernetes with Autoscaling
uses: nick-invision/retry@master
with:
timeout_minutes: 30
max_attempts: 3
command: |
NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_strategy_accurate
- name: Upload results
if: always()
uses: actions/upload-artifact@main
with:
name: chart_test_autoscaling_job_count_strategy_accurate
path: ./tests/tests/*.md
if-no-files-found: ignore
- name: Test Selenium Grid on Kubernetes with Autoscaling
uses: nick-invision/retry@master
with:
timeout_minutes: 30
max_attempts: 3
command: |
NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count
NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make ${{ matrix.test-strategy }}
- name: Upload results
if: always()
uses: actions/upload-artifact@main
with:
name: chart_test_autoscaling_job_count
name: ${{ matrix.test-strategy }}
path: ./tests/tests/*.md
if-no-files-found: ignore
- name: Cleanup Kubernetes cluster
Expand Down
32 changes: 31 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ SBOM_OUTPUT := $(or $(SBOM_OUTPUT),$(SBOM_OUTPUT),package_versions.txt)
KEDA_TAG_PREV_VERSION := $(or $(KEDA_TAG_PREV_VERSION),$(KEDA_TAG_PREV_VERSION),2.16.0-selenium-grid)
KEDA_TAG_VERSION := $(or $(KEDA_TAG_VERSION),$(KEDA_TAG_VERSION),2.16.0-selenium-grid)
KEDA_BASED_NAME := $(or $(KEDA_BASED_NAME),$(KEDA_BASED_NAME),ndviet)
KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241127)
KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241128)

all: hub \
distributor \
Expand Down Expand Up @@ -961,6 +961,36 @@ chart_test_autoscaling_playwright_connect_grid:
TEMPLATE_OUTPUT_FILENAME="k8s_playwright_connect_grid_basicAuth_secureIngress_ingressPublicIP_autoScaling_patchKEDA.yaml" \
./tests/charts/make/chart_test.sh JobAutoscaling

chart_test_autoscaling_job_count_chaos:
MATRIX_TESTS=AutoScalingTestsScaleChaos \
make chart_test_autoscaling_job_count

chart_test_autoscaling_job_count_max_sessions:
MAX_SESSIONS_FIREFOX=2 MAX_SESSIONS_EDGE=2 MAX_SESSIONS_CHROME=2 \
make chart_test_autoscaling_job_count

chart_test_autoscaling_job_count:
MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") SCALING_STRATEGY=$(or $(SCALING_STRATEGY), "default") \
PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \
SELENIUM_GRID_PORT=80 SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \
VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \
./tests/charts/make/chart_test.sh JobAutoscaling

chart_test_autoscaling_deployment_count_chaos:
MATRIX_TESTS=AutoScalingTestsScaleChaos \
make chart_test_autoscaling_deployment_count

chart_test_autoscaling_deployment_count_max_sessions:
MAX_SESSIONS_FIREFOX=3 MAX_SESSIONS_EDGE=2 MAX_SESSIONS_CHROME=2 \
make chart_test_autoscaling_deployment_count

chart_test_autoscaling_deployment_count:
MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") \
PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \
SELENIUM_GRID_PORT=80 SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \
VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \
./tests/charts/make/chart_test.sh DeploymentAutoscaling

chart_test_delete:
helm del test -n selenium || true
helm del selenium -n selenium || true
Expand Down
Empty file.
90 changes: 90 additions & 0 deletions tests/AutoscalingTests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import unittest
import random
import time
import subprocess
import signal
import concurrent.futures
import csv
import os
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.edge.options import Options as EdgeOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.remote.client_config import ClientConfig
from csv2md.table import Table

BROWSER = {
"chrome": ChromeOptions(),
"firefox": FirefoxOptions(),
"edge": EdgeOptions(),
}

CLIENT_CONFIG = ClientConfig(
remote_server_addr=f"http://localhost/selenium/wd/hub",
keep_alive=True,
timeout=3600,
)

FIELD_NAMES = ["Iteration", "New request sessions", "Requests accepted time", "Sessions failed", "New scaled pods", "Total sessions", "Total pods", "Gaps"]

def get_pod_count():
result = subprocess.run(["kubectl", "get", "pods", "-A", "--no-headers"], capture_output=True, text=True)
return len([line for line in result.stdout.splitlines() if "selenium-node-" in line and "Running" in line])

def create_session(browser_name):
return webdriver.Remote(command_executor=CLIENT_CONFIG.remote_server_addr, options=BROWSER[browser_name], client_config=CLIENT_CONFIG)

def wait_for_count_matches(sessions, timeout=10, interval=5):
elapsed = 0
while elapsed < timeout:
pod_count = get_pod_count()
if pod_count == len(sessions):
break
print(f"VALIDATING: Waiting for pods to match sessions... ({elapsed}/{timeout} seconds elapsed)")
time.sleep(interval)
elapsed += interval
if pod_count != len(sessions):
print(f"WARN: Mismatch between pod count and session count after {timeout} seconds. Gaps: {pod_count - len(sessions)}")
else:
print(f"PASS: Pod count matches session count after {elapsed} seconds.")

def close_all_sessions(sessions):
for session in sessions:
session.quit()
sessions.clear()
return sessions

def create_sessions_in_parallel(new_request_sessions):
failed_jobs = 0
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(create_session, random.choice(list(BROWSER.keys()))) for _ in range(new_request_sessions)]
sessions = []
for future in concurrent.futures.as_completed(futures):
try:
sessions.append(future.result())
except Exception as e:
print(f"ERROR: Failed to create session: {e}")
failed_jobs += 1
print(f"Total failed jobs: {failed_jobs}")
return sessions

def randomly_quit_sessions(sessions, sublist_size):
if sessions:
sessions_to_quit = random.sample(sessions, min(sublist_size, len(sessions)))
for session in sessions_to_quit:
session.quit()
sessions.remove(session)
print(f"QUIT: {len(sessions_to_quit)} sessions have been randomly quit.")
return sessions

def export_results_to_csv(output_file, field_names, results):
with open(output_file, mode="w") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=field_names)
writer.writeheader()
writer.writerows(results)

def export_results_csv_to_md(csv_file, md_file):
with open(csv_file) as f:
table = Table.parse_csv(f)
with open(md_file, mode="w") as f:
f.write(table.markdown())
58 changes: 58 additions & 0 deletions tests/AutoscalingTests/test_scale_chaos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import unittest
import random
import time
import signal
import csv
from csv2md.table import Table
from .common import *

SESSIONS = []
RESULTS = []

def signal_handler(signum, frame):
print("Signal received, quitting all sessions...")
close_all_sessions(SESSIONS)

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)

class SeleniumAutoscalingTests(unittest.TestCase):
def test_run_tests(self):
try:
for iteration in range(10):
new_request_sessions = random.randint(2, 15)
start_time = time.time()
start_pods = get_pod_count()
new_sessions = create_sessions_in_parallel(new_request_sessions)
failed_sessions = new_request_sessions - len(new_sessions)
end_time = time.time()
stop_pods = get_pod_count()
SESSIONS.extend(new_sessions)
elapsed_time = end_time - start_time
new_scaled_pods = stop_pods - start_pods
total_sessions = len(SESSIONS)
total_pods = get_pod_count()
RESULTS.append({
FIELD_NAMES[0]: iteration + 1,
FIELD_NAMES[1]: new_request_sessions,
FIELD_NAMES[2]: f"{elapsed_time:.2f} s",
FIELD_NAMES[3]: failed_sessions,
FIELD_NAMES[4]: new_scaled_pods,
FIELD_NAMES[5]: total_sessions,
FIELD_NAMES[6]: total_pods,
FIELD_NAMES[7]: total_pods - total_sessions,
})
print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.")
print(f"INFO: Total sessions: {total_sessions}")
print(f"INFO: Total pods: {total_pods}")
randomly_quit_sessions(SESSIONS, 10)
time.sleep(15)
finally:
print(f"FINISH: Closing {len(SESSIONS)} sessions.")
close_all_sessions(SESSIONS)
output_file = f"tests/scale_up_results_{random.randint(1, 10000)}"
export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS)
export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md")

if __name__ == "__main__":
unittest.main()
59 changes: 59 additions & 0 deletions tests/AutoscalingTests/test_scale_up.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import unittest
import random
import time
import signal
import csv
from csv2md.table import Table
from .common import *

SESSIONS = []
RESULTS = []

def signal_handler(signum, frame):
print("Signal received, quitting all sessions...")
close_all_sessions(SESSIONS)

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)

class SeleniumAutoscalingTests(unittest.TestCase):
def test_run_tests(self):
try:
for iteration in range(10):
new_request_sessions = random.randint(1, 3)
start_time = time.time()
start_pods = get_pod_count()
new_sessions = create_sessions_in_parallel(new_request_sessions)
failed_sessions = new_request_sessions - len(new_sessions)
end_time = time.time()
stop_pods = get_pod_count()
SESSIONS.extend(new_sessions)
elapsed_time = end_time - start_time
new_scaled_pods = stop_pods - start_pods
total_sessions = len(SESSIONS)
total_pods = get_pod_count()
RESULTS.append({
FIELD_NAMES[0]: iteration + 1,
FIELD_NAMES[1]: new_request_sessions,
FIELD_NAMES[2]: f"{elapsed_time:.2f} s",
FIELD_NAMES[3]: failed_sessions,
FIELD_NAMES[4]: new_scaled_pods,
FIELD_NAMES[5]: total_sessions,
FIELD_NAMES[6]: total_pods,
FIELD_NAMES[7]: total_pods - total_sessions,
})
print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.")
print(f"INFO: Total sessions: {total_sessions}")
print(f"INFO: Total pods: {total_pods}")
if iteration % 4 == 0:
randomly_quit_sessions(SESSIONS, 15)
time.sleep(15)
finally:
print(f"FINISH: Closing {len(SESSIONS)} sessions.")
close_all_sessions(SESSIONS)
output_file = f"tests/scale_up_results_{random.randint(1, 10000)}"
export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS)
export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md")

if __name__ == "__main__":
unittest.main()
18 changes: 15 additions & 3 deletions tests/bootstrap.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env bash
set -o xtrace

MATRIX_TESTS=${MATRIX_TESTS:-"default"}

cd tests || true

if [ "${CI:-false}" = "false" ]; then
Expand All @@ -14,10 +18,18 @@ else
python3 -m pip install selenium==${BINDING_VERSION} | grep -v 'Requirement already satisfied'
fi

python3 -m pip install docker requests chardet | grep -v 'Requirement already satisfied'
python3 -m pip install -r requirements.txt | grep -v 'Requirement already satisfied'

python3 test.py $1
ret_code=$?
if [ "$1" = "AutoscalingTestsScaleUp" ]; then
python3 -m unittest AutoscalingTests.test_scale_up
ret_code=$?
elif [ "$1" = "AutoScalingTestsScaleChaos" ]; then
python3 -m unittest AutoscalingTests.test_scale_chaos
ret_code=$?
else
python3 test.py $1
ret_code=$?
fi

if [ "${CI:-false}" = "false" ]; then
deactivate
Expand Down
4 changes: 3 additions & 1 deletion tests/charts/ci/DeploymentAutoscaling-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ autoscaling:
maxReplicaCount: 4
pollingInterval: 10
scaledObjectOptions:
cooldownPeriod: 30
cooldownPeriod: ${AUTOSCALING_COOLDOWN_PERIOD}
terminationGracePeriodSeconds: 360

# Configuration for chrome nodes
Expand Down Expand Up @@ -47,6 +47,8 @@ chromeNode:
value: "1080"
- name: TZ
value: "Asia/Saigon"
- name: SE_NODE_SESSION_TIMEOUT
value: "3600"
readinessProbe:
enabled: &readinessProbe true
livenessProbe:
Expand Down
2 changes: 2 additions & 0 deletions tests/charts/ci/JobAutoscaling-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ chromeNode:
value: "1080"
- name: TZ
value: "Asia/Saigon"
- name: SE_NODE_SESSION_TIMEOUT
value: "3600"
readinessProbe:
enabled: &readinessProbe false
livenessProbe:
Expand Down
2 changes: 1 addition & 1 deletion tests/charts/ci/base-recorder-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"

videoRecorder:
enabled: true
enabled: ${ENABLE_VIDEO_RECORDER}
extraVolumes:
# - name: videos
# persistentVolumeClaim:
Expand Down
Loading

0 comments on commit d1ac580

Please sign in to comment.