From 82d333eb0011597cc2a020b37a6f8af83a01507b Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Thu, 17 May 2018 13:43:10 -0400 Subject: [PATCH] Set up artifact reporting for ci-operator jobs --- cluster/ci/config/prow/config.yaml | 9 ++- .../config/prow/jobs/cluster-launch-e2e.yaml | 59 +++++++++++++++++-- .../prow/openshift/ci-operator/build.yaml | 5 +- 3 files changed, 63 insertions(+), 10 deletions(-) diff --git a/cluster/ci/config/prow/config.yaml b/cluster/ci/config/prow/config.yaml index 67cb2c01c36d..9fee54cb60d1 100644 --- a/cluster/ci/config/prow/config.yaml +++ b/cluster/ci/config/prow/config.yaml @@ -4,7 +4,6 @@ plank: default_decoration_config: timeout: 7200000000000 # 2h grace_period: 15000000000 # 15s - artifact_dir: /tmp/artifacts utility_images: clonerefs: "registry.svc.ci.openshift.org/ci/clonerefs:latest-json" initupload: "registry.svc.ci.openshift.org/ci/initupload:latest-json" @@ -1025,7 +1024,7 @@ presubmits: serviceAccountName: ci-operator containers: - name: test - image: docker-registry.default.svc:5000/ci/ci-operator:latest + image: ci-operator:latest env: - name: CONFIG_SPEC valueFrom: @@ -1036,7 +1035,7 @@ presubmits: - ci-operator args: - --delete-when-idle=10m - - --artifact-dir=/tmp/artifacts + - --artifact-dir=$(ARTIFACTS) - --dry-run=false - --target=unit - name: pull-ci-image-registry-e2e @@ -1063,7 +1062,7 @@ presubmits: name: cluster-profile-gcp containers: - name: test - image: docker-registry.default.svc:5000/ci/ci-operator:latest + image: ci-operator:latest volumeMounts: - name: job-definition mountPath: /usr/local/e2e-gcp @@ -1093,7 +1092,7 @@ presubmits: export RPM_REPO="$( curl -q "${RPM_REPO_BASEURL_REF}" 2>/dev/null)" ci-operator \ --delete-when-idle=10m --dry-run=false \ - --artifact-dir=/tmp/artifacts \ + --artifact-dir=$(ARTIFACTS) \ --secret-dir=/usr/local/e2e-gcp-cluster-profile --template=/usr/local/e2e-gcp \ --target=e2e-gcp diff --git a/cluster/ci/config/prow/jobs/cluster-launch-e2e.yaml b/cluster/ci/config/prow/jobs/cluster-launch-e2e.yaml index 77e4e0f619b7..05f84ddc7b53 100644 --- a/cluster/ci/config/prow/jobs/cluster-launch-e2e.yaml +++ b/cluster/ci/config/prow/jobs/cluster-launch-e2e.yaml @@ -46,6 +46,8 @@ objects: activeDeadlineSeconds: 7200 terminationGracePeriodSeconds: 600 volumes: + - name: artifacts + emptyDir: {} - name: shared-tmp emptyDir: {} - name: cluster-profile @@ -62,6 +64,8 @@ objects: mountPath: /tmp/shared - name: cluster-profile mountPath: /tmp/cluster + - name: artifacts + mountPath: /tmp/artifacts env: - name: HOME value: /tmp/home @@ -75,11 +79,12 @@ objects: trap 'touch /tmp/shared/exit' EXIT trap 'kill $(jobs -p); exit 0' TERM + cp "$(which oc)" /tmp/shared/ + mkdir -p "${HOME}" - export KUBECONFIG=/tmp/shared/admin.kubeconfig while true; do - if [[ ! -f $KUBECONFIG ]]; then + if [[ ! -f /tmp/shared/admin.kubeconfig ]]; then sleep 15 & wait continue fi @@ -87,11 +92,16 @@ objects: done echo "Found shared kubeconfig" + # don't let clients impact the global kubeconfig + cp /tmp/shared/admin.kubeconfig /tmp/admin.kubeconfig + export KUBECONFIG=/tmp/admin.kubeconfig + PATH=/usr/libexec/origin:$PATH if [[ "${CLUSTER_TYPE}" == "gcp" ]]; then export GOOGLE_APPLICATION_CREDENTIALS="/tmp/cluster/gce.json" export KUBE_SSH_USER=cloud-user + mkdir -p ~/.ssh cp /tmp/cluster/ssh-privatekey ~/.ssh/google_compute_engine || true export PROVIDER_ARGS='-provider=gce -gce-zone=us-east1-c -gce-project=openshift-gce-devel-ci' fi @@ -102,7 +112,7 @@ objects: set -x ginkgo -v -noColor -nodes=40 $( which extended.test ) -- \ -suite "${TEST_SUITE}" -ginkgo.focus="${TEST_FOCUS}" -ginkgo.focus="${TEST_SKIP}" \ - -e2e-output-dir /tmp/output -report-dir /tmp/output/junit \ + -e2e-output-dir /tmp/artifacts -report-dir /tmp/artifacts/junit \ -test.timeout=2h ${PROVIDER_ARGS-} # Runs an install @@ -135,6 +145,8 @@ objects: mountPath: /tmp/shared - name: cluster-profile mountPath: /usr/share/ansible/openshift-ansible/inventory/dynamic/injected + - name: artifacts + mountPath: /tmp/artifacts env: - name: INSTANCE_PREFIX value: ${NAMESPACE}-${JOB_NAME_HASH} @@ -143,7 +155,46 @@ objects: - -c - | #!/bin/bash - trap '/usr/local/bin/entrypoint-${CLUSTER_TYPE} ansible-playbook -vv playbooks/${CLUSTER_TYPE}/openshift-cluster/deprovision.yml' EXIT + function teardown() { + set +e + echo "Gathering artifacts ..." + export KUBECONFIG=/tmp/shared/admin.kubeconfig + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics + + oc get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes + oc get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers + oc get nodes -o json > /tmp/artifacts/nodes.json + oc get events --all-namespaces -o json > /tmp/artifacts/events.json + oc get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-api.gz + oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-controllers.gz + done < /tmp/pods-api + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + oc logs $i | gzip -c > /tmp/artifacts/pods/${file}.log.gz + oc logs -p $i | gzip -c > /tmp/artifacts/pods/${file}_previous.log.gz + done < /tmp/containers + while IFS= read -r i; do + mkdir -p /tmp/artifacts/nodes/$i + oc get --raw /api/v1/nodes/$i/proxy/metrics | gzip -c > /tmp/artifacts/metrics/node-$i.gz + oc get --raw /api/v1/nodes/$i/proxy/debug/pprof/heap | gzip -c > /tmp/artifacts/nodes/$i/heap.gz + oc get --raw /api/v1/nodes/$i/proxy/logs/messages | gzip -c > /tmp/artifacts/nodes/$i/messages.gz + oc get --raw /api/v1/nodes/$i/proxy/logs/secure | gzip -c > /tmp/artifacts/nodes/$i/secure.gz + oc get --raw /api/v1/nodes/$i/proxy/logs/audit | gzip -c > /tmp/artifacts/nodes/$i/audit.gz + oc get --raw /api/v1/nodes/$i/proxy/logs/journal | sed -e 's|.*href="\(.*\)".*|\1|;t;d' > /tmp/journals + while IFS= read -r j; do + oc get --raw /api/v1/nodes/$i/proxy/logs/journal/${j}system.journal | gzip -c > /tmp/artifacts/nodes/$i/journal.gz + done < /tmp/journals + done < /tmp/nodes + + echo "Deprovisioning cluster ..." + /usr/local/bin/entrypoint-${CLUSTER_TYPE} ansible-playbook -vv playbooks/${CLUSTER_TYPE}/openshift-cluster/deprovision.yml + } + + trap 'teardown' EXIT trap 'kill $(jobs -p); exit 0' TERM for i in `seq 1 120`; do diff --git a/cluster/ci/config/prow/openshift/ci-operator/build.yaml b/cluster/ci/config/prow/openshift/ci-operator/build.yaml index 551ea6275934..40b37fec259a 100644 --- a/cluster/ci/config/prow/openshift/ci-operator/build.yaml +++ b/cluster/ci/config/prow/openshift/ci-operator/build.yaml @@ -40,4 +40,7 @@ items: kind: ImageStream metadata: name: ci-operator - namespace: ci \ No newline at end of file + namespace: ci + spec: + lookupPolicy: + local: true \ No newline at end of file