diff --git a/Dockerfile b/Dockerfile index 05c1cd6..1c3d952 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,17 @@ -# Builder stage -FROM registry.ci.openshift.org/ocp/builder:rhel-8-golang-1.20-openshift-4.16 AS builder -WORKDIR /go/src/github.com/red-hat-storage/odf-must-gather +FROM registry.ci.openshift.org/ocp/4.16:cli -COPY . . -ENV GO_PACKAGE github.com/red-hat-storage/odf-must-gather +WORKDIR /tmp -# Prod stage -FROM registry.ci.openshift.org/ocp/4.16:cli +COPY . . RUN mkdir -p /templates -COPY --from=builder /go/src/github.com/red-hat-storage/odf-must-gather/collection-scripts/* /usr/bin/ -COPY --from=builder /go/src/github.com/red-hat-storage/odf-must-gather/templates/* /templates/ + +COPY collection-scripts/* /usr/bin/ +COPY templates/* /templates/ # We do not need it as of now # jq is not preinstalled on openshift/origin-cli either # Removing this step makes local development easier. # RUN yum install --setopt=tsflags=nodocs -y jq && yum clean all && rm -rf /var/cache/yum/* -ENTRYPOINT /usr/bin/gather \ No newline at end of file +ENTRYPOINT ["/usr/bin/gather"] diff --git a/Makefile b/Makefile index 3573038..d5404b5 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ endif @oc login --token=$(ODF_MG_TOKEN) --server=https://api.ci.l2s4.p1.openshiftapps.com:6443 1>/dev/null @echo "Logging into the CI image registry..." - @oc registry login --registry registry.ci.openshift.org &>/dev/null + @REGISTRY_AUTH_FILE=~/.docker/config.json oc registry login --registry registry.ci.openshift.org 2>/dev/null @echo "Will be using $(IMG_TAG) as the image tag, you can change this using IMG_TAG env var." diff --git a/collection-scripts/gather b/collection-scripts/gather index e069369..d40356f 100755 --- a/collection-scripts/gather +++ b/collection-scripts/gather @@ -132,6 +132,9 @@ START_TIME=$(date +%r) start=$(date +%s) dbglog "collection started at: ${START_TIME}" +# Print and export must-gather pod details +export_pod_image_details + if [ -n "${LOG_FILTER_ARGS:-}" ]; then dbglog "Logs will be filtered using: ${LOG_FILTER_ARGS}" fi diff --git a/collection-scripts/gather_ceph_logs b/collection-scripts/gather_ceph_logs index 815cf18..397b297 100755 --- a/collection-scripts/gather_ceph_logs +++ b/collection-scripts/gather_ceph_logs @@ -27,6 +27,12 @@ for ns in $namespaces; do oc rsync -n "${ns}" "$(oc get pods -n "${ns}" --no-headers | grep "${node//./}-debug" | awk '{print $1}')":/host/var/lib/rook/"${ns}".nfs.csi.ceph.com/log "${CSI_LOG_OUTPUT_DIR}" } + # collect logs from the csi pods if csi is installed using csi-operator + csi_operator_log_collection() { + dbglog "collecting csi logs from node ${node}" + oc rsync -n "${ns}" "$(oc get pods -n "${ns}" --no-headers | grep "${node//./}-debug" | awk '{print $1}')":/host/var/lib/cephcsi "${CSI_LOG_OUTPUT_DIR}" + } + crash_core_collection() { dbglog "collecting crash core dump from node ${node}" oc rsync -n "${ns}" "$(oc get pods -n "${ns}" --no-headers | grep "${node//./}"-debug | awk '{print $1}')":/host/var/lib/rook/"${ns}"/crash/ "${CRASH_OUTPUT_DIR}" @@ -66,8 +72,11 @@ CMDS mkdir -p "${KERNEL_OUTPUT_DIR}" mkdir -p "${COREDUMP_OUTPUT_DIR}" ceph_daemon_log_collection & + pids_log+=($!) csi_log_collection & pids_log+=($!) + csi_operator_log_collection & + pids_log+=($!) crash_core_collection & pids_log+=($!) journal_collection & diff --git a/collection-scripts/gather_namespaced_resources b/collection-scripts/gather_namespaced_resources index 75a92a9..38067b7 100755 --- a/collection-scripts/gather_namespaced_resources +++ b/collection-scripts/gather_namespaced_resources @@ -74,6 +74,29 @@ oc_yamls+=("alertmanager") oc_yamls+=("prometheus") oc_yamls+=("alertmanagerconfig") +# collect get output of OC commands for all namespaces +oc_get_all=() +oc_get_all+=("pvc") +oc_get_all+=("volumesnapshot") +oc_get_all+=("volumegroupnapshot") +oc_get_all+=("obc") +oc_get_all+=("volumereplication") +oc_get_all+=("volumereplicationgroups") +oc_get_all+=("csiaddonsnode") +oc_get_all+=("storageclaim") +oc_get_all+=("managedfusionoffering") +oc_get_all+=("reclaimspacejob") +oc_get_all+=("reclaimspacecronjobs") +oc_get_all+=("networkfence") +oc_get_all+=("network-attachment-definitions") +oc_get_all+=("encryptionkeyrotationjob") +oc_get_all+=("encryptionkeyrotationcronjob") +oc_get_all+=("operatorconfigs.csi.ceph.io") +oc_get_all+=("drivers.csi.ceph.io") +oc_get_all+=("cephconnections.csi.ceph.io") +oc_get_all+=("clientprofiles.csi.ceph.io") +oc_get_all+=("clientprofilemappings.csi.ceph.io") + for INSTALL_NAMESPACE in $PRODUCT_NAMESPACE $INSTALL_NAMESPACES $MANAGED_FUSION_NAMESPACE $OPERATOR_NAMESPACE; do dbglog "collecting dump of namespace ${INSTALL_NAMESPACE}" { oc adm inspect --dest-dir="${BASE_COLLECTION_PATH}" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} ns/"${INSTALL_NAMESPACE}" 2>&1; } | dbglog @@ -125,99 +148,16 @@ for resource in "${resources[@]}"; do { oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} "${resource}" 2>&1; } | dbglog done -# For pvc of all namespaces -dbglog "collecting dump of oc get pvc all namespaces" -{ oc get pvc --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/pvc_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} pvc 2>&1; } | dbglog - -# For volumesnapshot of all namespaces -dbglog "collecting dump of oc get volumesnapshot all namespaces" -{ oc get volumesnapshot --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_volumesnapshot_all_namespaces" -{ oc describe volumesnapshot --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_volumesnapshot_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} volumesnapshot 2>&1; } | dbglog - -# For volumegroupsnapshot of all namespaces -dbglog "collecting dump of oc get volumegroupsnapshot all namespaces" -{ oc get volumegroupsnapshot --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_volumegroupsnapshot_all_namespaces" -{ oc describe volumegroupsnapshot --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_volumegroupsnapshot_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} volumegroupsnapshot 2>&1; } | dbglog - -# For obc of all namespaces -dbglog "collecting dump of oc get obc all namespaces" -{ oc get obc --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/obc_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} obc 2>&1; } | dbglog - -# For VolumeReplication of all namespaces -dbglog "collecting dump of oc get volumereplication all namespaces" -{ oc get volumereplication --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/vr_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} volumereplication 2>&1; } | dbglog - -# For VolumeReplicationGroups of all namespaces -dbglog "collecting dump of oc get volumereplicationgroups all namespaces" -{ oc get volumereplicationgroups --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/vrg_all_namespaces" -{ oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} vrg 2>&1; } | dbglog - -# Collect details of storageclaim of all namespaces for managed services -dbglog "collecting dump of oc get storageclaim all namespaces" -if [ -n "$(oc get storageclaim --no-headers -A | awk '{print $2}')" ]; then - { oc get storageclaim --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_storageclaim_all_ns" - { oc describe storageclaim --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_storageclaim_all_ns" - { oc get storageclaim -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_storageclaim_all_ns" -fi - -# Collect details of managedfusionoffering of all namespaces for managed services -dbglog "collecting dump of oc get managedfusionoffering all namespaces" -if [ -n "$(oc get managedfusionoffering --no-headers -A | awk '{print $1}')" ]; then - { oc get managedfusionoffering --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_managedfusionoffering_all_ns" - { oc describe managedfusionoffering --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_managedfusionoffering_all_ns" - { oc get managedfusionoffering -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_managedfusionoffering_all_ns" -fi - -# Collect csi-addons object details of all namespaces -dbglog "collecting dump of oc get csiaddonsnode all namespaces" -if [ -n "$(oc get csiaddonsnode --no-headers -A | awk '{print $2}')" ]; then - { oc get csiaddonsnode --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_csiaddonsnode_all_ns" - { oc describe csiaddonsnode --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_csiaddonsnode_all_ns" - { oc get csiaddonsnode -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_csiaddonsnode_all_ns" -fi - -dbglog "collecting dump of oc get reclaimspacejob all namespaces" -if [ -n "$(oc get reclaimspacejob --no-headers -A | awk '{print $1}')" ]; then - { oc get reclaimspacejob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_reclaimspacejob_all_ns" - { oc describe reclaimspacejob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_reclaimspacejob_all_ns" - { oc get reclaimspacejob -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_reclaimspacejob_all_ns" -fi - -dbglog "collecting dump of oc get reclaimspacecronjobs all namespaces" -if [ -n "$(oc get reclaimspacecronjobs --no-headers -A | awk '{print $1}')" ]; then - { oc get reclaimspacecronjobs --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_reclaimspacecronjobs_all_ns" - { oc describe reclaimspacecronjobs --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_reclaimspacecronjobs_all_ns" - { oc get reclaimspacecronjobs -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_reclaimspacecronjobs_all_ns" -fi - -dbglog "collecting dump of oc get networkfence all namespaces" -if [ -n "$(oc get networkfence --no-headers -A | awk '{print $1}')" ]; then - { oc get networkfence --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_networkfence_all_ns" - { oc describe networkfence --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_networkfence_all_ns" - { oc get networkfence -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_networkfence_all_ns" -fi - -dbglog "collecting network-attachment-definitions of oc get network-attachment-definitions all namespaces" -if [ -n "$(oc get network-attachment-definitions --no-headers -A | awk '{print $1}')" ]; then - { oc get network-attachment-definitions -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_net_attach_def_all_ns" - { oc describe network-attachment-definitions --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_net_attach_def_all_ns" -fi - -dbglog "collecting dump of oc get encryptionkeyrotationjob all namespaces" -if [ -n "$(oc get encryptionkeyrotationjob --no-headers -A | awk '{print $1}')" ]; then - { oc get encryptionkeyrotationjob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_encryptionkeyrotationjob_all_ns" - { oc describe encryptionkeyrotationjob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_encryptionkeyrotationjob_all_ns" - { oc get encryptionkeyrotationjob -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_encryptionkeyrotationjob_all_ns" -fi - -dbglog "collecting dump of oc get encryptionkeyrotationcronjob all namespaces" -if [ -n "$(oc get encryptionkeyrotationcronjob --no-headers -A | awk '{print $1}')" ]; then - { oc get encryptionkeyrotationcronjob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_encryptionkeyrotationcronjob_all_ns" - { oc describe encryptionkeyrotationcronjob --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/desc_encryptionkeyrotationcronjob_all_ns" - { oc get encryptionkeyrotationcronjob -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/get_yaml_encryptionkeyrotationcronjob_all_ns" -fi +# Run the Collection of oc_get_all +for oc_get in "${oc_get_all[@]}"; do + dbglog "collecting oc get command ${oc_get}" + COMMAND_OUTPUT_FILE=${BASE_COLLECTION_PATH}/namespaces/all/${oc_get// /_} + # shellcheck disable=SC2086 + { oc get "${oc_get}" --all-namespaces; } >>"${COMMAND_OUTPUT_FILE}" + dbglog "collecting oc describe command ${oc_get}" + COMMAND_OUTPUT_FILE=${BASE_COLLECTION_PATH}/namespaces/all/${oc_get// /_} + # shellcheck disable=SC2086 + { oc describe "${oc_get}" --all-namespaces; } >>"${COMMAND_OUTPUT_FILE}" + { oc adm inspect --all-namespaces --dest-dir="${BASE_COLLECTION_PATH}/namespaces/all/" ${LOG_FILTER_ARGS:+"${LOG_FILTER_ARGS}"} "${oc_get}" 2>&1; } | dbglog + { oc get "${oc_get}" -oyaml --all-namespaces; } >>"${BASE_COLLECTION_PATH}/namespaces/all/${oc_get// /_}" +done diff --git a/collection-scripts/utils.sh b/collection-scripts/utils.sh index e9892a6..f44bda7 100755 --- a/collection-scripts/utils.sh +++ b/collection-scripts/utils.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# shellcheck disable=SC2155 # # Please Note: @@ -79,7 +80,33 @@ parse_since_time() { fi } +# export_pod_image_details fetches the pod metadata +# using kubernetes API +function export_pod_image_details() { + # We do not override the hostname in odf-mg, hence hostname = pod name + local POD_NAME=$(hostname) + + # Kubernetes API token and endpoints + local API_URL="https://kubernetes.default.svc" + local NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + local TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + + # Get pod metadata + local POD_METADATA=$(curl -s --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + -H "Authorization: Bearer $TOKEN" \ + "$API_URL/api/v1/namespaces/$NAMESPACE/pods/$POD_NAME") + + # Extract image details + local IMAGE=$(echo "$POD_METADATA" | awk -F'"' '/"image":/ {print $4; exit}') + + # Also save the pod metadata to a file + echo "$POD_METADATA" >"${BASE_COLLECTION_PATH}"/pod-metadata.json + + dbglog "must-gather is using image: $IMAGE" +} + # Export the functions so that the file needs to be sourced only once export -f dbglog export -f dbglogf export -f parse_since_time +export -f export_pod_image_details diff --git a/templates/pod.template b/templates/pod.template index 9147e43..24f6377 100755 --- a/templates/pod.template +++ b/templates/pod.template @@ -29,7 +29,7 @@ spec: name: rook-ceph-mon key: ceph-secret image: IMAGE_NAME - imagePullPolicy: IfNotPresent + imagePullPolicy: Always name: must-gather-helper securityContext: privileged: true