Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Index benchmark metadata #346

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions utils/benchmark-operator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ remove_benchmark_operator() {
}

############################################################################
# Creates a benchmark and wait for it to complete
# Creates a benchmark, waits for it to complete and index benchmark metadata
# Arguments:
# Benchmark CR
# Timeout in seconds
############################################################################
run_benchmark() {
source ${ripsaw_tmp}/bin/activate
local start_date=$(date +%s%3N)
local rc=0
if ! ripsaw benchmark run -f ${1} -t ${2}; then
rc=1
Expand All @@ -66,6 +67,7 @@ run_benchmark() {
done
remove_cli
fi
deactivate
local benchmark_name=$(cat ${1} | python -c 'import yaml; import sys; print(yaml.safe_load(sys.stdin.read())["metadata"]["name"])')
gen_metadata ${benchmark_name} ${start_date} $(date +%s%3N)
return ${rc}
}
106 changes: 54 additions & 52 deletions utils/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ function check_pod_ready_state () {



gen_spreadsheet_helper() {
pip install oauth2client>=4.1.3 gspread
python3 $(dirname $(realpath ${BASH_SOURCE[0]}))/csv_gen.py --sheetname ${1}-$(date "+%Y-%m-%dT%H:%M:%S") -c ${2} --email ${3} --service-account ${4}
}

##############################################################################
# Imports a CSV file into a google spreadsheet
# Arguments:
Expand All @@ -87,12 +92,6 @@ function check_pod_ready_state () {
# Gmail email address
# Service account file
##############################################################################

gen_spreadsheet_helper() {
pip install oauth2client>=4.1.3 gspread
python3 $(dirname $(realpath ${BASH_SOURCE[0]}))/csv_gen.py --sheetname ${1}-$(date "+%Y-%m-%dT%H:%M:%S") -c ${2} --email ${3} --service-account ${4}
}

gen_spreadsheet() {
log "Installing requirements to generate spreadsheet"
if [[ "${VIRTUAL_ENV}" != "" ]]; then
Expand All @@ -116,46 +115,48 @@ gen_spreadsheet() {
# start_date (epoch)
# end_date (epoch)
##############################################################################

gen_metadata() {
BENCHMARK=$1
START_DATE=$2
END_DATE=$3

# construct all the required information
local VERSION_INFO=$(oc version -o json)
local INFRA_INFO=$(oc get infrastructure.config.openshift.io cluster -o json)
local PLATFORM=$(echo ${INFRA_INFO} | jq -r .spec.platformSpec.type)
if [[ ${PLATFORM} =~ "AWS" ]]; then
local CLUSTERTYPE=$(echo ${INFRA_INFO} | jq -r .status.platformStatus.aws.resourceTags[0].value)
fi
local CLUSTER_NAME=$(echo ${INFRA_INFO} | jq -r .status.infrastructureName)
local OCP_VERSION=$(echo ${VERSION_INFO} | jq -r .openshiftVersion)
local K8S_VERSION=$(echo ${VERSION_INFO} | jq -r .serverVersion.gitVersion)
local MASTER_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/master= --no-headers | wc -l)
local WORKER_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/worker= --no-headers | wc -l)
local INFRA_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/infra= --no-headers --ignore-not-found | wc -l)
local SDN_TYPE=$(oc get networks.operator.openshift.io cluster -o jsonpath="{.spec.defaultNetwork.type}")
if [[ ${PLATFORM} != "BareMetal" ]]; then
local MASTER_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/master= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
local WORKLOAD_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/workload= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
local WORKER_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/worker= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
if [[ ${INFRA_NODES} -gt 0 ]]; then
local INFRA_NODES_TYPE=$(oc get node --ignore-not-found -l node-role.kubernetes.io/infra= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
fi
fi
if [[ ${BENCHMARK} =~ "cyclictest" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/cyclictest= --no-headers --ignore-not-found | wc -l)
elif [[ $BENCHMARK =~ "oslat" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/oslat= --no-headers --ignore-not-found | wc -l)
elif [[ $BENCHMARK =~ "testpmd" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/testpmd= --no-headers --ignore-not-found | wc -l)
else
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/workload= --no-headers --ignore-not-found | wc -l)
fi
local TOTAL_NODES=$(oc get node --no-headers | wc -l)
local RESULT=$(oc get benchmark ${BENCHMARK} -o json | jq -r '.status.state')
local UUID=$(oc get benchmark ${BENCHMARK} -o json | jq -r '.status.uuid')
local BENCHMARK=$1
local START_DATE=$2
local END_DATE=$3

# construct all the required information
local VERSION_INFO=$(oc version -o json)
local INFRA_INFO=$(oc get infrastructure.config.openshift.io cluster -o json)
local PLATFORM=$(echo ${INFRA_INFO} | jq -r .spec.platformSpec.type)
if [[ ${PLATFORM} =~ "AWS" ]]; then
local CLUSTERTYPE=$(echo ${INFRA_INFO} | jq -r .status.platformStatus.aws.resourceTags[0].value)
fi
local CLUSTER_NAME=$(echo ${INFRA_INFO} | jq -r .status.infrastructureName)
local OCP_VERSION=$(echo ${VERSION_INFO} | jq -r .openshiftVersion)
local K8S_VERSION=$(echo ${VERSION_INFO} | jq -r .serverVersion.gitVersion)
local MASTER_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/master= --no-headers | wc -l)
local WORKER_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/worker= --no-headers | wc -l)
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/workload= --no-headers --ignore-not-found | wc -l)
local INFRA_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/infra= --no-headers --ignore-not-found | wc -l)
local SDN_TYPE=$(oc get networks.operator.openshift.io cluster -o jsonpath="{.spec.defaultNetwork.type}")
if [[ ${PLATFORM} != "BareMetal" ]]; then
local MASTER_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/master= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
local WORKER_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/worker= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
if [[ ${WORKLOAD_NODES_COUNT} -gt 0 ]]; then
local WORKLOAD_NODES_TYPE=$(oc get node -l node-role.kubernetes.io/workload= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
fi
if [[ ${INFRA_NODES_COUNT} -gt 0 ]]; then
local INFRA_NODES_TYPE=$(oc get node --ignore-not-found -l node-role.kubernetes.io/infra= --no-headers -o go-template='{{index (index .items 0).metadata.labels "beta.kubernetes.io/instance-type"}}')
fi
fi
if [[ ${BENCHMARK} =~ "cyclictest" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/cyclictest= --no-headers --ignore-not-found | wc -l)
elif [[ $BENCHMARK =~ "oslat" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/oslat= --no-headers --ignore-not-found | wc -l)
elif [[ $BENCHMARK =~ "testpmd" ]]; then
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/testpmd= --no-headers --ignore-not-found | wc -l)
else
local WORKLOAD_NODES_COUNT=$(oc get node -l node-role.kubernetes.io/workload= --no-headers --ignore-not-found | wc -l)
fi
local TOTAL_NODES=$(oc get node --no-headers | wc -l)
local RESULT=$(oc get benchmark ${BENCHMARK} -o json | jq -r '.status.state')
local UUID=$(oc get benchmark ${BENCHMARK} -o json | jq -r '.status.uuid')


# stupid indentation because bash won't find the closing EOF if it's not at the beginning of the line
Expand All @@ -170,22 +171,23 @@ local METADATA=$(cat << EOF
"worker_nodes_type":"${WORKER_NODES_TYPE}",
"infra_nodes_type":"${INFRA_NODES_TYPE}",
"workload_nodes_type":"${INFRA_NODES_TYPE}",
"master_nodes_count":"${MASTER_NODES_COUNT}",
"worker_nodes_count":"${WORKER_NODES_COUNT}",
"infra_nodes_count":"${INFRA_NODES_COUNT}",
"master_nodes_count":${MASTER_NODES_COUNT},
"worker_nodes_count":${WORKER_NODES_COUNT},
"infra_nodes_count":${INFRA_NODES_COUNT},
"workload_nodes_count":${WORKLOAD_NODES_COUNT},
"total_nodes":"${TOTAL_NODES}",
"total_nodes":${TOTAL_NODES},
"sdn_type":"${SDN_TYPE}",
"benchmark":"${BENCHMARK}",
"start_date":"${START_DATE}",
"timestamp":"${START_DATE}",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we changing this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We generally use timestamp as date field in our indexes and grafana is only able to handle a time field (Except for uperf which is uperf_ts, that's something we'll have to change to stantarize)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohh right yeah, this was added recently

"end_date":"${END_DATE}",
"result":"${RESULT}"
}
EOF
)

# send the document to ES
curl -X POST -H "Content-type: application/json" ${ES_SERVER}/${ES_INDEX}/_doc -d "${METADATA}"
# send the document to ES
log "Indexing benchmark metadata to ${ES_SERVER}/${ES_INDEX}"
curl -sS -X POST -H "Content-type: application/json" ${ES_SERVER}/${ES_INDEX}/_doc -d "${METADATA}"
}


3 changes: 1 addition & 2 deletions workloads/kube-burner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,11 @@ Workloads can be tweaked with the following environment variables:
| **PRELOAD_PERIOD** | How long the preload stage will last | 2m |
| **LOG_STREAMING** | Enable log streaming of kube-burner pod | true |
| **CLEANUP** | Delete old namespaces for the selected workload before starting benchmark | false |
| **CLEANUP_WHEN_FINISH** | Delete workload's namespaces after running it | false |
| **CLEANUP_WHEN_FINISH** | Delete benchmark objects and workload's namespaces after running it | false |
| **KUBE_BURNER_IMAGE** | Kube-burner container image | quay.io/cloud-bulldozer/kube-burner:v0.14.3 |
| **LOG_LEVEL** | Kube-burner log level | info |
| **PPROF_COLLECTION** | Collect and store pprof data locally | false |
| **PPROF_COLLECTION_INTERVAL** | Intervals for which pprof data will be collected | 5m |
| **TEST_CLEANUP** | Remove benchmark CR at the end | true |
| **POD_READY_THRESHOLD** | Pod ready latency threshold (only applies node-density and pod-density workloads). [More info](https://kube-burner.readthedocs.io/en/latest/measurements/#pod-latency-thresholds) | 5000ms |

**Note**: You can use basic authentication for ES indexing using the notation `http(s)://[username]:[password]@[host]:[port]` in **ES_SERVER**.
Expand Down
13 changes: 5 additions & 8 deletions workloads/kube-burner/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,10 @@ run_workload() {
rm -rf ${tmpdir}
log "Deploying benchmark"
set +e
local TMPCR=$(mktemp)
TMPCR=$(mktemp)
envsubst < $1 > ${TMPCR}
run_benchmark ${TMPCR} $((JOB_TIMEOUT + 600))
local rc=$?
if [[ ${TEST_CLEANUP} == "true" ]]; then
log "Cleaning up benchmark"
kubectl delete -f ${TMPCR}
kubectl delete configmap -n benchmark-operator kube-burner-cfg-${UUID}
fi
return ${rc}
rc=$?
}

label_nodes() {
Expand Down Expand Up @@ -142,6 +136,9 @@ check_running_benchmarks() {
}

cleanup() {
log "Cleaning up benchmark"
kubectl delete -f ${TMPCR}
kubectl delete configmap -n benchmark-operator kube-burner-cfg-${UUID}
if ! oc delete ns -l kube-burner-uuid=${UUID} --grace-period=600 --timeout=30m; then
rsevilla87 marked this conversation as resolved.
Show resolved Hide resolved
log "Namespaces cleanup failure"
rc=1
Expand Down
1 change: 0 additions & 1 deletion workloads/kube-burner/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ if [[ ${PPROF_COLLECTION} == "true" ]] ; then
get_pprof_secrets
fi
run_workload kube-burner-crd.yaml
rc=$?
if [[ ${WORKLOAD} == node-density* ]]; then
unlabel_nodes
fi
Expand Down
1 change: 1 addition & 0 deletions workloads/network-perf/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

TEST_CLEANUP=${TEST_CLEANUP:-true}
export ES_SERVER=${ES_SERVER:-https://search-perfscale-dev-chmf5l4sh66lvxbnadi4bznl3a.us-west-2.es.amazonaws.com:443}
export ES_INDEX=ripsaw-uperf-results
export METADATA_COLLECTION=${METADATA_COLLECTION:-true}
export METADATA_TARGETED=${METADATA_TARGETED:-true}

Expand Down
1 change: 1 addition & 0 deletions workloads/scale-perf/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
TEST_CLEANUP=${TEST_CLEANUP:-true}
export UUID=${UUID:-$(uuidgen)}
export ES_SERVER=${ES_SERVER:-https://search-perfscale-dev-chmf5l4sh66lvxbnadi4bznl3a.us-west-2.es.amazonaws.com:443}
export ES_INDEX=openshift-cluster-timings
export METADATA_COLLECTION=${METADATA_COLLECTION:-false}
export CLOUD_NAME=${CLOUD_NAME:-test_cloud}
if [[ -n $UUID ]]; then
Expand Down