Skip to content

Commit

Permalink
Merge pull request #1464 from adriangonz/integration-tests-tracing
Browse files Browse the repository at this point in the history
Integration tests tracing
  • Loading branch information
seldondev authored Feb 27, 2020
2 parents cd23be3 + e71614c commit 98d9435
Show file tree
Hide file tree
Showing 10 changed files with 293 additions and 47 deletions.
File renamed without changes.
95 changes: 95 additions & 0 deletions testing/resources/graph-tracing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{
"apiVersion": "machinelearning.seldon.io/v1alpha2",
"kind": "SeldonDeployment",
"metadata": {
"labels": {
"app": "seldon"
},
"name": "mymodel"
},
"spec": {
"name": "mymodel",
"oauth_key": "oauth-key",
"oauth_secret": "oauth-secret",
"predictors": [
{
"svcOrchSpec": {
"env": [
{
"name": "SELDON_LOG_LEVEL",
"value": "DEBUG"
},
{
"name": "TRACING",
"value": "1"
},
{
"name": "JAEGER_AGENT_HOST",
"value": "jaeger-agent.seldon"
},
{
"name": "JAEGER_AGENT_PORT",
"value": "5775"
},
{
"name": "JAEGER_SAMPLER_TYPE",
"value": "const"
},
{
"name": "JAEGER_SAMPLER_PARAM",
"value": "1"
}
]
},
"componentSpecs": [
{
"spec": {
"containers": [
{
"image": "seldonio/fixed-model:0.1",
"imagePullPolicy": "IfNotPresent",
"env": [
{ "name": "TRACING", "value": "1" },
{
"name": "JAEGER_AGENT_HOST",
"value": "jaeger-agent.seldon"
},
{
"name": "JAEGER_AGENT_PORT",
"value": "5775"
},
{
"name": "JAEGER_SAMPLER_TYPE",
"value": "const"
},
{
"name": "JAEGER_SAMPLER_PARAM",
"value": "1"
}
],
"name": "complex-model",
"resources": {
"requests": {
"memory": "1Mi"
}
}
}
],
"terminationGracePeriodSeconds": 1
}
}
],
"graph": {
"children": [],
"name": "complex-model",
"endpoint": {
"type": "REST"
},
"type": "MODEL"
},
"name": "mymodel",
"replicas": 1
}
]
}
}
21 changes: 21 additions & 0 deletions testing/resources/jaeger.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
apiVersion: jaegertracing.io/v1
kind: Jaeger
metadata:
name: jaeger
spec:
strategy: allInOne
allInOne:
options:
query:
base-path: /jaeger
---
apiVersion: getambassador.io/v1
kind: Mapping
metadata:
name: jaeger
spec:
prefix: /jaeger
service: jaeger-query.seldon:16686
# Jaeger will expect the `/jaeger` prefix, so we can't rewrite it
rewrite: ""
31 changes: 31 additions & 0 deletions testing/resources/kind_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
kind: Cluster
apiVersion: kind.sigs.k8s.io/v1alpha3
nodes:
- role: control-plane
- role: worker
extraPortMappings:
- containerPort: 30080
hostPort: 8003
- containerPort: 31280
hostPort: 8004
kubeadmConfigPatches:
- |
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
metadata:
name: config
kubeReserved:
cpu: "300m"
memory: "300Mi"
ephemeral-storage: "1Gi"
kubeReservedCgroup: "/kube-reserved"
systemReserved:
cpu: "300m"
memory: "300Mi"
ephemeral-storage: "1Gi"
evictionHard:
memory.available: "200Mi"
nodefs.available: "10%"
featureGates:
DynamicKubeletConfig: true
RotateKubeletServerCertificate: true
22 changes: 19 additions & 3 deletions testing/scripts/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ VERSION := $(shell cat ../../version.txt)
PYTEST_WORKERS ?= "4"

kind_create_cluster:
kind create cluster --config kind_config.yaml
kind create cluster --config ../resources/kind_config.yaml

kind_delete_cluster:
kind delete cluster
Expand All @@ -27,10 +27,26 @@ kind_build_images: build_protos kind_build_engine kind_build_operator kind_build
helm_setup:
helm repo add stable https://kubernetes-charts.storage.googleapis.com/
helm repo add seldonio https://storage.googleapis.com/seldon-charts
helm repo add jaegertracing https://jaegertracing.github.io/helm-charts
helm repo update

install_ambassador:
helm install --wait ambassador stable/ambassador -f ambassador_values.yaml --set crds.keep=false --namespace seldon --set replicaCount=1
helm install ambassador \
stable/ambassador \
-f ../resources/ambassador_values.yaml \
--set crds.keep=false \
--namespace seldon \
--set replicaCount=1 \
--wait

install_jaeger:
helm install jaeger-operator \
jaegertracing/jaeger-operator \
--set rbac.clusterRole=true \
--namespace seldon \
--wait
kubectl apply -f ../resources/jaeger.yaml --namespace seldon


install_cert_manager:
cd ../../operator && make install-cert-manager
Expand Down Expand Up @@ -61,7 +77,7 @@ create_namespaces:
set_namespace:
kubectl config set-context $$(kubectl config current-context) --namespace=seldon

kind_setup: create_namespaces helm_setup install_ambassador install_istio install_seldon set_namespace
kind_setup: create_namespaces helm_setup install_ambassador install_istio install_jaeger install_seldon set_namespace
port-forward-ambassador:
kubectl port-forward $$(kubectl get pods -n seldon -l app.kubernetes.io/name=ambassador -o jsonpath='{.items[0].metadata.name}') -n seldon 8003:8080

Expand Down
2 changes: 1 addition & 1 deletion testing/scripts/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pytest==5.3.1
pytest-xdist==1.30.0
pytest-cov==2.8.1
flaky==3.6.1
retrying==1.3.3
tenacity==6.0.0

# 2nd lvl dep on cov required to avoid sqllite dep
coverage==4.5.4
Expand Down
54 changes: 54 additions & 0 deletions testing/scripts/jaeger_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import requests
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_result

from seldon_e2e_utils import API_AMBASSADOR

JAEGER_QUERY_URL = f"http://{API_AMBASSADOR}/jaeger"


def _is_empty(result):
return result is None or len(result) == 0


@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(max=5),
retry=retry_if_result(_is_empty),
)
def get_traces(pod_name, service, operation):
"""
Fetch traces for a given pod, service and operation.
We use Jaeger's [**internal** REST
API](https://www.jaegertracing.io/docs/1.13/apis/#http-json-internal).
Therefore, it may stop working at some point!
Note that this method will get retried 5 times (with an exponentially
growing waiting time) if the traces are empty.
This is to give time to Jaeger to collect and process the trace, which is
performed asynchronously.
Parameters
---
pod_name : str
We currently don't have access to the PUID (see
https://github.com/SeldonIO/seldon-core/issues/1460).
As a workaround, we filter the traces using the Pod name.
service : str
Service sending the traces.
This will usually be the `'executor'`, since it's the one which creates
the trace.
operation : str
Operation which was traced (e.g. `'predictions'`).
Returns
---
traces : list
List of traces, where each trace contains spans, processes, etc.
"""
endpoint = f"{JAEGER_QUERY_URL}/api/traces"
params = {"service": service, "operation": operation, "tag": f"hostname:{pod_name}"}
response = requests.get(endpoint, params=params)
payload = response.json()
traces = payload["data"]
return traces
32 changes: 0 additions & 32 deletions testing/scripts/kind_config.yaml

This file was deleted.

28 changes: 17 additions & 11 deletions testing/scripts/seldon_e2e_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from concurrent.futures import ThreadPoolExecutor, wait
from subprocess import run, Popen
from retrying import retry
from tenacity import retry, wait_exponential, stop_after_attempt
from requests.auth import HTTPBasicAuth

from seldon_core.proto import prediction_pb2
Expand Down Expand Up @@ -137,6 +137,20 @@ def wait_for_status(name, namespace, attempts=20, sleep=5):
time.sleep(sleep)


def get_pod_names(deployment_name, namespace):
cmd = f"kubectl get pod -l app={deployment_name} -n {namespace} -o json"
ret = run(cmd, shell=True, check=True, stdout=subprocess.PIPE)
pods = json.loads(ret.stdout)

pod_names = []
for pod in pods["items"]:
pod_metadata = pod["metadata"]
pod_name = pod_metadata["name"]
pod_names.append(pod_name)

return pod_names


def rest_request(
model,
namespace,
Expand Down Expand Up @@ -270,11 +284,7 @@ def create_random_data(data_size, rows=1):
return (shape, arr)


@retry(
wait_exponential_multiplier=1000,
wait_exponential_max=10000,
stop_max_attempt_number=5,
)
@retry(wait=wait_exponential(max=10), stop=stop_after_attempt(5))
def rest_request_ambassador(
deployment_name,
namespace,
Expand Down Expand Up @@ -324,11 +334,7 @@ def rest_request_ambassador(
return response


@retry(
wait_exponential_multiplier=1000,
wait_exponential_max=10000,
stop_max_attempt_number=5,
)
@retry(wait=wait_exponential(max=10), stop=stop_after_attempt(5))
def rest_request_ambassador_auth(
deployment_name,
namespace,
Expand Down
Loading

0 comments on commit 98d9435

Please sign in to comment.