Skip to content

Commit

Permalink
tracing: Add test to verify tracing works correctly
Browse files Browse the repository at this point in the history
This tracing test runs as follows:
1. Enables tracing on runtime and shim components.
2. Runs a Jaeger container that will receive the trace spans.
3. A kata-container is executed.
4. Verifies that Jaeger has collected a number of spans.
5. Disables tracing.

Fixes: kata-containers#1255.

Signed-off-by: Salvador Fuentes <[email protected]>
  • Loading branch information
chavafg committed Mar 8, 2019
1 parent 3c02251 commit f75074b
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 5 deletions.
43 changes: 43 additions & 0 deletions .ci/configure_tracing_for_kata.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash
#
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#

set -o errexit
set -o nounset
set -o pipefail

cidir=$(dirname "$0")
source "${cidir}/lib.sh"

[ "$#" -eq 1 ] || die "Specify enable or disable"

default_kata_cfg_file="/usr/share/defaults/kata-containers/configuration.toml"
kata_cfg_file=${kata_cfg_file:-$default_kata_cfg_file}

enable_tracing() {
sudo crudini --set "$kata_cfg_file" shim.kata enable_tracing true
sudo crudini --set "$kata_cfg_file" runtime enable_tracing true
sudo crudini --set "$kata_cfg_file" runtime internetworking_model none
sudo crudini --set "$kata_cfg_file" runtime disable_new_netns true
}

disable_tracing() {
sudo crudini --set "$kata_cfg_file" shim.kata enable_tracing false
sudo crudini --set "$kata_cfg_file" runtime enable_tracing false
sudo crudini --set "$kata_cfg_file" runtime internetworking_model macvtap
sudo crudini --set "$kata_cfg_file" runtime disable_new_netns false
}

main() {
cmd="$1"
case "$cmd" in
enable ) enable_tracing ;;
disable ) disable_tracing ;;
*) die "invalid command: '$cmd'" ;;
esac
}

main "$@"
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
TIMEOUT := 60

# union for 'make test'
UNION := functional docker crio docker-compose network netmon docker-stability oci openshift kubernetes swarm vm-factory entropy ramdisk shimv2
UNION := functional docker crio docker-compose network netmon \
docker-stability oci openshift kubernetes swarm vm-factory \
entropy ramdisk shimv2 tracing

# skipped test suites for docker integration tests
FILTER_FILE = .ci/hypervisors/$(KATA_HYPERVISOR)/filter_docker_$(KATA_HYPERVISOR).sh
Expand Down Expand Up @@ -143,6 +145,9 @@ netmon:
bash -f .ci/install_bats.sh
bats integration/netmon/netmon_test.bats

tracing:
bash tracing/tracing-test.sh

test: ${UNION}

check: checkcommits log-parser
Expand All @@ -168,4 +173,5 @@ check: checkcommits log-parser
network \
ramdisk \
test \
tracing \
vm-factory
14 changes: 10 additions & 4 deletions lib/common.bash
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2018 Intel Corporation
# Copyright (c) 2018-2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -12,14 +12,20 @@ VC_POD_DIR="${VC_POD_DIR:-/var/lib/vc/sbs}"

KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"

die(){
msg="$*"
die() {
local msg="$*"
echo "ERROR: $msg" >&2
exit 1
}

warn() {
local msg="$*"
echo "WARNING: $msg"
}

info() {
echo -e "INFO: $*"
local msg="$*"
echo "INFO: $msg"
}

# Check if the $1 argument is the name of a 'known'
Expand Down
242 changes: 242 additions & 0 deletions tracing/tracing-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/bin/bash
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#

set -o errexit
set -o nounset
set -o pipefail
set -o errtrace

[ -n "$DEBUG" ] && set -o xtrace

SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"

RUNTIME=${RUNTIME:-kata-runtime}

jaeger_server=${jaeger_server:-localhost}
jaeger_ui_port=${jaeger_ui_port:-16686}
jaeger_docker_container_name="jaeger"

# Run an operation to generate Jaeger trace spans
create_trace()
{
sudo docker run -ti --runtime "$RUNTIME" --net=none busybox true
}

start_jaeger()
{
local jaeger_docker_image="jaegertracing/all-in-one:latest"

# Clean up from any previous runs
stop_jaeger 2>/dev/null || true

# Defaults - see https://www.jaegertracing.io/docs/getting-started/
docker run -d --runtime runc --name "${jaeger_docker_container_name}" \
-e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \
-p 5775:5775/udp \
-p 6831:6831/udp \
-p 6832:6832/udp \
-p 5778:5778 \
-p "${jaeger_ui_port}:${jaeger_ui_port}" \
-p 14268:14268 \
-p 9411:9411 \
"$jaeger_docker_image"
}

stop_jaeger()
{
docker stop "${jaeger_docker_container_name}"
docker rm -f "${jaeger_docker_container_name}"
}

# Returns status from Jaeger web UI
get_jaeger_status()
{
local service="$1"

[ -z "$service" ] && die "need jaeger service name"

local attempt=0
local status=""

while [ $attempt -lt 10 ]
do
status=$(curl -s "http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" 2>/dev/null)
local ret=$?

[ "$ret" -eq 0 ] && [ -n "$status" ] && break

attempt=$((attempt+1))
sleep 0.1
done

echo "$status"
}

# Look for any "dangling" spans that have not been reported to the Jaeger
# agent.
check_missing_spans()
{
local service="$1"
local min_spans="$2"

[ -z "$service" ] && die "need jaeger service name"
[ -z "$min_spans" ] && die "need minimum trace span count"

local logfile=$(mktemp)

sudo journalctl -q -o cat -a -t "$service" > "$logfile"
sudo chown "$USER:" "$logfile"

# This message needs to be logged by each Kata component, generally when
# debug is enabled.
local component_prefix="created span"

# Message prefix added by Jaeger when LogSpans=true
# (see
# https://godoc.org/github.com/uber/jaeger-client-go/config#ReporterConfig).
local jaeger_reporter_prefix="Reporting span"

local logged_spans=$(grep -E -o "${component_prefix} [^ ][^ ]*" "$logfile" | awk '{print $3}')

if [ -z "$logged_spans" ]
then
info "No logged spans to check"
rm -f "$logfile"
return
fi

local count=0

for span in $logged_spans
do
count=$((count+1))

# Remove quotes
span=$(echo $span|tr -d '"')

grep -E -q "${jaeger_reporter_prefix} \<$span\>" "$logfile" || \
die "span $count ($span) not reported"
done

[ "$count" -lt "$min_spans" ] && \
die "expected >= $min_spans reported spans, got $count"

info "All $count spans reported"

rm -f "$logfile"
}

# Check Jaeger spans for the specified service.
check_jaeger_status()
{
local service="$1"
local min_spans="$2"

[ -z "$service" ] && die "need jaeger service name"
[ -z "$min_spans" ] && die "need minimum trace span count"

local status
local errors=0

local attempt=0
local attempts=3

while [ "$attempt" -lt "$attempts" ]
do
status=$(get_jaeger_status "$service")

#------------------------------
# Basic sanity checks
[ -z "$status" ] && die "failed to query status via HTTP"

local span_lines=$(echo "$status"|jq -S '.data[].spans | length')
[ -z "$span_lines" ] && die "no span status"

local span_lines_count=$(echo "$span_lines"|wc -l)

# Total up all span counts
local spans=$(echo "$span_lines"|paste -sd+ -|bc)
[ -z "$spans" ] && die "no spans"

# Ensure total span count is numeric
echo "$spans"|grep -q "^[0-9][0-9]*$"
[ $? -eq 0 ] || die "invalid span count: '$spans'"

info "found $spans spans (across $span_lines_count traces)"

# Validate
[ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans"

# Look for common errors in span data
local errors1=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span")
if [ -n "$errors1" ]
then
errors=$((errors+1))
warn "Found invalid parent span errors (attempt $attempt): $errors1"
continue
else
errors=$((errors-1))
[ "$errors" -lt 0 ] && errors=0
fi

# Crude but it works
local errors2=$(echo "$status"|jq -S . 2>/dev/null|grep "\"warnings\""|grep -E -v "\<null\>")
if [ -n "$errors2" ]
then
errors=$((errors+1))
warn "Found warnings (attempt $attempt): $errors2"
continue
else
errors=$((errors-1))
[ "$errors" -lt 0 ] && errors=0
fi

attempt=$((attempt+1))

[ "$errors" -eq 0 ] && break
done

[ "$errors" -gt 0 ] && die "errors still detected after $attempts attempts"
}

run_test()
{
local min_spans="$1"
local service="$2"

[ -z "$min_spans" ] && die "need minimum span count"
[ -z "$service" ] && die "need service name"

start_jaeger

create_trace

check_jaeger_status "$service" "$min_spans"

check_missing_spans "$service" "$min_spans"

stop_jaeger

info "test passed"
}

main()
{
runtime_min_spans=10
shim_min_spans=10

# Name of Jaeger "services" (aka Kata components) to check trace for.
runtime_service="kata-runtime"
shim_service="kata-shim"

.ci/configure_tracing_for_kata.sh enable
run_test "$runtime_min_spans" "$runtime_service"
run_test "$shim_min_spans" "$shim_service"
.ci/configure_tracing_for_kata.sh disable
}

main "$@"

0 comments on commit f75074b

Please sign in to comment.