From 476fc9a33dfd8775e7888f796b02a568266e1c5d Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 12:16:58 +0100 Subject: [PATCH] [8.6](backport #3285) fix: remove Helm Chart tests (#3335) * bump stack version 8.6.0-2d1af9b1 * fix: remove Helm Chart tests (#3285) * bump stack version 8.7.0-04d5f080 * fix: remove Helm Chart tests * test: show env vars and versions * test: set shell flags * test: show env * fix: freeze requirements * docs: add ec2 issue * fix: issue with local pyenv versions installed * fix: improve health checks * fix: decrease debug level * fix: add some ansible config * feat: run filebeat to grab the Docker logs * fix: add run context * fix: stop stack * fix: grab docker logs * fix: stop stack * fix: grab junit results * fix: ignore errors * fix: typo * fix: grab stack logs from main agent * fix: env generation an configuration by default * docs: wrong steps * fix: update runners * fix: improve Stack provision * fix: remove attribute * fix: get public IP * fix: lint * fix: remove docker pull output * feat: add clean target * feat: gran Elastic Agent reports * fix: remove label setting * chore: Ansible confing * fix: set test logs right path * fix: retry stack deploy * feat: grab logs * fix: set proper label * feat: print logs * fix: increase retry time * test: change log to INFO * fix: sysntax error * fix: update update test versions * fix: grab logs * fix: remove folder before move agent * fix: change log level * fix: set command path * fix: fetch logs * fix: collect artifacts * feat: allow overwrite Elastic Stack config * fix: force binariy path * fix: test only releases * fix: report junit test results * fix: set runner Ip properly * test: collect logs in a tar file * fix: wrong flename * fix: remove fleet folder * fix: do not log env * fix: lint * fix: proper report folder name * feat: grab logs in fleet mode * fix: exclude more folders * fix: exclude elastic-agent binary * fix: update teardown playbook * fix: teardown the environment Co-authored-by: apmmachine (cherry picked from commit d5541388a9b2e54b0b8ae23b29ab156127e36664) * fix: conflits * fix: add missing tags Co-authored-by: apmmachine Co-authored-by: Ivan Fernandez Calvo Co-authored-by: Ivan Fernandez Calvo --- .ci/.e2e-tests-beats.yaml | 9 - .ci/.e2e-tests-daily.yaml | 12 - .ci/.e2e-tests.yaml | 12 - .ci/Jenkinsfile | 59 +- .ci/Makefile | 69 +- .ci/README.md | 145 +++- .ci/ansible/ansible.cfg | 11 +- .ci/ansible/elastic-stack.yml | 5 + .ci/ansible/fetch-test-reports.yml | 2 +- .ci/ansible/playbook.yml | 4 +- .ci/ansible/requirements.txt | 43 +- .ci/ansible/tasks/copy_test_files.yml | 1 + .ci/ansible/tasks/fetch_test_reports.yml | 64 +- .ci/ansible/tasks/runners.yml | 51 +- .ci/ansible/tasks/setup_test_script.yml | 82 +- .../tasks/setup_test_script_windows.yml | 38 +- .ci/ansible/teardown.yml | 22 +- .ci/e2eTestingHelmDaily.groovy | 53 -- .ci/e2eTestingMacosDaily.groovy | 6 +- ...-daily-mbp.yml => e2e-testing-mbp-tmp.yml} | 28 +- .ci/schedule-daily.groovy | 3 +- .ci/scripts/functional-test.sh | 3 + .ci/scripts/gen-platform-env-file.py | 40 + .ci/scripts/run_filebeat.sh | 84 ++ .ci/scripts/yq.sh | 33 - .github/paths-labeller.yml | 2 - .stack-version | 2 +- README.md | 4 - e2e/TROUBLESHOOTING.md | 1 - .../fleet/features/upgrade_agent.feature | 4 +- e2e/_suites/fleet/fleet_test.go | 22 + e2e/_suites/helm/Makefile | 1 - e2e/_suites/helm/README.md | 80 -- e2e/_suites/helm/features/apm_server.feature | 16 - e2e/_suites/helm/features/filebeat.feature | 17 - e2e/_suites/helm/features/metricbeat.feature | 23 - e2e/_suites/helm/helm_charts_test.go | 754 ------------------ .../autodiscover_test.go | 4 +- internal/common/defaults.go | 19 +- .../compose/profiles/fleet/docker-compose.yml | 16 +- .../elastic-agent/cloud/docker-compose.yml | 2 +- .../services/elastic-agent/docker-compose.yml | 6 +- .../fleet-server/docker-compose.yml | 2 +- .../services/elasticsearch/docker-compose.yml | 6 +- .../services/kibana/docker-compose.yml | 8 +- .../services/metricbeat/docker-compose.yml | 2 +- internal/config/config.go | 8 +- .../base/elasticsearch/deployment.yaml | 2 +- .../base/fleet-server/deployment.yaml | 2 +- .../kubernetes/base/kibana/deployment.yaml | 2 +- internal/deploy/compose.go | 2 + internal/deploy/docker.go | 4 +- internal/deploy/docker_client.go | 25 +- internal/deploy/elastic_package.go | 4 +- internal/deploy/kubernetes.go | 4 +- internal/installer/elasticagent_tar.go | 5 +- internal/installer/elasticagent_tar_macos.go | 3 +- internal/installer/elasticagent_zip.go | 5 +- internal/kibana/server.go | 39 +- internal/utils/retry.go | 4 +- internal/utils/utils.go | 9 +- pkg/downloads/buckets.go | 2 +- pkg/downloads/releases.go | 6 +- 63 files changed, 716 insertions(+), 1280 deletions(-) delete mode 100644 .ci/e2eTestingHelmDaily.groovy rename .ci/jobs/{e2e-testing-helm-daily-mbp.yml => e2e-testing-mbp-tmp.yml} (65%) create mode 100755 .ci/scripts/gen-platform-env-file.py create mode 100755 .ci/scripts/run_filebeat.sh delete mode 100755 .ci/scripts/yq.sh delete mode 100644 e2e/_suites/helm/Makefile delete mode 100644 e2e/_suites/helm/README.md delete mode 100644 e2e/_suites/helm/features/apm_server.feature delete mode 100644 e2e/_suites/helm/features/filebeat.feature delete mode 100644 e2e/_suites/helm/features/metricbeat.feature delete mode 100644 e2e/_suites/helm/helm_charts_test.go diff --git a/.ci/.e2e-tests-beats.yaml b/.ci/.e2e-tests-beats.yaml index 6be125424c..d47bb90742 100644 --- a/.ci/.e2e-tests-beats.yaml +++ b/.ci/.e2e-tests-beats.yaml @@ -1,14 +1,5 @@ --- SUITES: - - suite: "helm" - provider: "docker" - scenarios: - - name: "Filebeat" - tags: "filebeat" - platforms: ["debian_10_amd64"] - - name: "Metricbeat" - tags: "metricbeat" - platforms: ["debian_10_amd64"] - suite: "fleet" scenarios: - name: "Fleet" diff --git a/.ci/.e2e-tests-daily.yaml b/.ci/.e2e-tests-daily.yaml index 77a16afbee..53f97de1b0 100644 --- a/.ci/.e2e-tests-daily.yaml +++ b/.ci/.e2e-tests-daily.yaml @@ -1,17 +1,5 @@ --- SUITES: - - suite: "helm" - provider: "docker" - scenarios: - - name: "APM Server" - tags: "apm-server" - platforms: ["debian_10_amd64"] - - name: "Filebeat" - tags: "filebeat" - platforms: ["debian_10_amd64"] - - name: "Metricbeat" - tags: "metricbeat" - platforms: ["debian_10_amd64"] - suite: "fleet" scenarios: - name: "Fleet" diff --git a/.ci/.e2e-tests.yaml b/.ci/.e2e-tests.yaml index 77a16afbee..53f97de1b0 100644 --- a/.ci/.e2e-tests.yaml +++ b/.ci/.e2e-tests.yaml @@ -1,17 +1,5 @@ --- SUITES: - - suite: "helm" - provider: "docker" - scenarios: - - name: "APM Server" - tags: "apm-server" - platforms: ["debian_10_amd64"] - - name: "Filebeat" - tags: "filebeat" - platforms: ["debian_10_amd64"] - - name: "Metricbeat" - tags: "metricbeat" - platforms: ["debian_10_amd64"] - suite: "fleet" scenarios: - name: "Fleet" diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile index 55720d1747..028899c45f 100644 --- a/.ci/Jenkinsfile +++ b/.ci/Jenkinsfile @@ -55,12 +55,12 @@ pipeline { booleanParam(name: "notifyOnGreenBuilds", defaultValue: false, description: "If it's needed to notify to Slack with green builds.") string(name: 'SLACK_CHANNEL', defaultValue: 'observablt-bots', description: 'The Slack channel(s) where errors will be posted. For multiple channels, use a comma-separated list of channels') string(name: 'ELASTIC_AGENT_DOWNLOAD_URL', defaultValue: '', description: 'If present, it will override the download URL for the Elastic agent artifact. (I.e. https://snapshots.elastic.co/8.0.0-59098054/downloads/beats/elastic-agent/elastic-agent-8.0.0-SNAPSHOT-linux-x86_64.tar.gz') - string(name: 'ELASTIC_AGENT_VERSION', defaultValue: '8.6.0-55d181cf-SNAPSHOT', description: 'SemVer version of the Elastic Agent to be used for the tests. You can use here the tag of your PR to test your changes') - string(name: 'BEAT_VERSION', defaultValue: '8.6.0-55d181cf-SNAPSHOT', description: 'SemVer version of the Beat to be used for the tests. You can use here the tag of your PR to test your changes') - choice(name: 'LOG_LEVEL', choices: ['TRACE', 'DEBUG', 'INFO'], description: 'Log level to be used') + string(name: 'ELASTIC_AGENT_VERSION', defaultValue: '8.6.0-2d1af9b1-SNAPSHOT', description: 'SemVer version of the Elastic Agent to be used for the tests. You can use here the tag of your PR to test your changes') + string(name: 'BEAT_VERSION', defaultValue: '8.6.0-2d1af9b1-SNAPSHOT', description: 'SemVer version of the Beat to be used for the tests. You can use here the tag of your PR to test your changes') + choice(name: 'LOG_LEVEL', choices: ['INFO', 'DEBUG', 'TRACE'], description: 'Log level to be used') choice(name: 'TIMEOUT_FACTOR', choices: ['5', '3', '7', '11'], description: 'Max number of minutes for timeout backoff strategies') string(name: 'KIBANA_VERSION', defaultValue: '', description: 'Docker tag of the kibana to be used for the tests. It will refer to an image related to a Kibana PR, under the Observability-CI namespace') - string(name: 'STACK_VERSION', defaultValue: '8.6.0-55d181cf-SNAPSHOT', description: 'SemVer version of the stack to be used for the tests.') + string(name: 'STACK_VERSION', defaultValue: '8.6.0-2d1af9b1-SNAPSHOT', description: 'SemVer version of the stack to be used for the tests.') string(name: 'HELM_CHART_VERSION', defaultValue: '7.17.3', description: 'SemVer version of Helm chart to be used.') string(name: 'HELM_VERSION', defaultValue: '3.9.0', description: 'SemVer version of Helm to be used.') string(name: 'KIND_VERSION', defaultValue: '0.14.0', description: 'SemVer version of Kind to be used.') @@ -84,7 +84,6 @@ pipeline { KIBANA_VERSION = "${params.KIBANA_VERSION.trim()}" STACK_VERSION = "${params.STACK_VERSION.trim()}" FORCE_SKIP_GIT_CHECKS = "${params.forceSkipGitChecks}" - HELM_CHART_VERSION = "${params.HELM_CHART_VERSION.trim()}" HELM_VERSION = "${params.HELM_VERSION.trim()}" KIND_VERSION = "${params.KIND_VERSION.trim()}" KUBERNETES_VERSION = "${params.KUBERNETES_VERSION.trim()}" @@ -157,17 +156,9 @@ pipeline { "STACK_INSTANCE_ID=${env.BUILD_URL}_stack", "TAGS=non-existing-tag" ]) { - ciBuild() { - sh(label: 'Create Stack node', script: "make -C .ci provision-stack") - } ciBuild() { retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Setup Stack node', script: "make -C .ci setup-stack") - } - } - ciBuild() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Start Elastic Stack', script: "make -C .ci start-elastic-stack") + sh(label: 'Setup Stack node', script: "make -C .ci create-stack") } } } @@ -221,15 +212,27 @@ pipeline { cleanup { // Once all tests are complete we need to teardown the single instance with the deployed stack script { + dir("${env.REAL_BASE_DIR}") { + ciBuild() { + def stackIP = getNodeIp('stack') + sh(label: 'Grab logs', script:"make -C .ci fetch-test-reports NODE_IP_ADDRESS=${stackIP} NODE_LABEL=debian_10_amd64") + archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") + junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") + } + } def stackMachine = getMachineInfo('stack') if (!params.DESTROY_CLOUD_RESOURCES) { def stackRunnerIP = getNodeIp('stack') log(level: 'DEBUG', text: "Stack instance won't be destroyed after the build. Please SSH into the stack machine on ${stackRunnerIP}") } else { dir("${env.REAL_BASE_DIR}") { - ciBuild() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Destroy stack node', script: "make -C .ci destroy-stack") + withEnv([ + "STACK_INSTANCE_ID=${env.BUILD_URL}_stack", + ]) { + ciBuild() { + retryWithSleep(retries: 3, seconds: 5, backoff: true){ + sh(label: 'Destroy stack node', script: "make -C .ci destroy-stack") + } } } } @@ -278,7 +281,7 @@ def checkSkipTests() { } // patterns for all places that should trigger a full build - def regexps = [ "^e2e/_suites/fleet/.*", "^e2e/_suites/helm/.*", "^e2e/_suites/kubernetes-autodiscover/.*", "^.ci/.*", "^cli/.*", "^e2e/.*\\.go", "^internal/.*\\.go" ] + def regexps = [ "^e2e/_suites/fleet/.*", "^e2e/_suites/kubernetes-autodiscover/.*", "^.ci/.*", "^cli/.*", "^e2e/.*\\.go", "^internal/.*\\.go" ] setEnvVar("SKIP_TESTS", !isGitRegionMatch(patterns: regexps, shouldMatchAll: false)) } } @@ -302,9 +305,6 @@ def ciBuild(Closure body){ [var: "AWS_SECRET_ACCESS_KEY", password: awsAuthObj.secret_key] ]) { withOtelEnv() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh("make -C .ci setup-env") // make sure the environment is created - } body() } } @@ -543,21 +543,8 @@ def generateFunctionalTestStep(Map args = [:]){ } } } - withEnv([ - "ARCHITECTURE=${goArch}", - "CUCUMBER_REPORTS_PATH=${env.REAL_BASE_DIR}/outputs/${testRunnerIP}", - "PLATFORM=${platform}", - "SUITE=${suite}", - "TAGS=${tags}", - ]){ - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - dockerLogin(secret: "${DOCKER_ELASTIC_SECRET}", registry: "${DOCKER_REGISTRY}") - sh(script: ".ci/scripts/generate-cucumber-reports.sh", label: "generate-cucumber-reports.sh") - } - } - junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/${testRunnerIP}/TEST-*.xml") - archiveArtifacts allowEmptyArchive: true, - artifacts: "outputs/${testRunnerIP}/TEST-*.xml, outputs/${testRunnerIP}/TEST-*.json, outputs/${testRunnerIP}/TEST-*.json.html" + archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") + junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") } } } diff --git a/.ci/Makefile b/.ci/Makefile index dbc8dab94a..4f2d618eff 100644 --- a/.ci/Makefile +++ b/.ci/Makefile @@ -24,7 +24,7 @@ STACK_INSTANCE_ID ?= stack_$(STACK_LABEL)_$(RUN_ID) # Debian 10 AMD (see .e2e-platforms.yaml) NODE_IMAGE ?= ami-0d90bed76900e679a NODE_INSTANCE_TYPE ?= t3.xlarge -NODE_LABEL ?= debian_amd64 +NODE_LABEL ?= debian_10_amd64 NODE_SHELL_TYPE ?= sh NODE_USER ?= admin NODE_IP_ADDRESS ?= $(shell cat $(PROJECT_DIR)/.ci/.node-host-ip) @@ -41,6 +41,12 @@ SUITE ?= fleet # Tags to run. Please check out the feature files TAGS ?= fleet_mode +SHELL = /bin/bash +MAKEFLAGS += --silent --no-print-directory +.SHELLFLAGS = -ec + +export ANSIBLE_CONFIG := $(CURDIR)/ansible/ansible.cfg + # Check that given variables are set and all have non-empty values, # die with an error otherwise. # @@ -61,6 +67,21 @@ __check_defined = \ .runID: echo "$(shell uuidgen|cut -d'-' -f1)" > $(RUN_ID_FILE) +.PHONY: show-env +show-env: + @source $(VENV_DIR)/bin/activate; \ + echo "PROJECT_DIR: $(PROJECT_DIR)"; \ + echo "VENV_DIR: $(VENV_DIR)"; \ + echo "VENV_BIN_DIR: $(VENV_BIN_DIR)"; \ + echo "ANSIBLE: $$(ansible --version)"; \ + echo "Python: $$(python --version)"; \ + echo "RUN_ID: $(RUN_ID)"; \ + echo "STACK_VERSION: $(STACK_VERSION)"; \ + echo "SUITE: $(SUITE)"; \ + echo "TAGS: $(TAGS)"; \ + echo "PROVIDER: $(PROVIDER)"; + echo "NODE_LABEL: $(NODE_LABEL)"; + .PHONY: setup-env setup-env: $(MAKE) .runID @@ -74,17 +95,18 @@ destroy-env: rm -fr $(VENV_DIR) .PHONY: list-platforms -list-platforms: +list-platforms: @docker run --rm -i -w "/workdir" -v $(PROJECT_DIR)/.ci:/workdir mikefarah/yq:4 ".PLATFORMS | keys" .e2e-platforms.yaml # Create the env file for the target platform .PHONY: set-env-% set-env-%: - @$(PROJECT_DIR)/.ci/scripts/yq.sh "$*" + @$(PROJECT_DIR)/.ci/scripts/gen-platform-env-file.py "$*" .PHONY: provision-stack -provision-stack: .runID +provision-stack: setup-env show-env @:$(call check_defined, RUN_ID, You need to an unique RUN_ID. To create it please run 'make .runID' goal) + source $(VENV_DIR)/bin/activate; \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE) nodeUser=$(STACK_USER)" \ @@ -94,9 +116,10 @@ provision-stack: .runID .PHONY: setup-stack setup-stack: export TAGS = non-existing-tag -setup-stack: .runID +setup-stack: setup-env show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) @:$(call check_defined, STACK_IP_ADDRESS, IP address of the stack not defined) + source $(VENV_DIR)/bin/activate; \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE) nodeUser=$(STACK_USER)" \ @@ -109,15 +132,15 @@ setup-stack: .runID create-stack: provision-stack setup-stack start-elastic-stack .PHONY: destroy-stack -destroy-stack: +destroy-stack: setup-env show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) - $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ + source $(VENV_DIR)/bin/activate; \ + $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/teardown.yml \ --private-key="$(SSH_KEY)" \ --extra-vars="$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeUser=$(STACK_IMAGE)" \ --extra-vars="runId=$(RUN_ID) instanceID=$(STACK_INSTANCE_ID) nodeShellType=$(STACK_SHELL_TYPE) workspace=$(PROJECT_DIR)/ sshPublicKey=$(SSH_KEY_PUBLIC)" \ - --ssh-common-args='$(SSH_OPTS)' \ - -t destroy - rm -fr $(PROJECT_DIR)/.ci/.stack-host-ip + --ssh-common-args='$(SSH_OPTS)' + # rm -fr $(PROJECT_DIR)/.ci/.stack-host-ip .PHONY: ssh-stack ssh-stack: @@ -132,9 +155,10 @@ show-stack: @echo "Stack Shell : $(STACK_SHELL_TYPE)" .PHONY: provision-node -provision-node: .runID set-env-$(NODE_LABEL) +provision-node: setup-env set-env-$(NODE_LABEL) @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) @:$(call check_defined, STACK_IP_ADDRESS, IP address of the stack not defined) + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ @@ -143,11 +167,11 @@ provision-node: .runID set-env-$(NODE_LABEL) -t provision-node .PHONY: setup-node -setup-node: .runID set-env-$(NODE_LABEL) +setup-node: setup-env set-env-$(NODE_LABEL) @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) @:$(call check_defined, STACK_IP_ADDRESS, IP address of the stack not defined) @:$(call check_defined, NODE_IP_ADDRESS, IP address of the test node not defined) - source $(VENV)/bin/activate; \ + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ @@ -160,8 +184,9 @@ setup-node: .runID set-env-$(NODE_LABEL) create-node: provision-node setup-node .PHONY: destroy-node -destroy-node: set-env-$(NODE_LABEL) +destroy-node: setup-env set-env-$(NODE_LABEL) @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/teardown.yml \ --private-key="$(SSH_KEY)" \ --extra-vars="$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeUser=$${NODE_USER}" \ @@ -170,9 +195,10 @@ destroy-node: set-env-$(NODE_LABEL) rm -fr $(PROJECT_DIR)/.ci/.node-host-ip .PHONY: fetch-test-reports -fetch-test-reports: .runID set-env-$(NODE_LABEL) +fetch-test-reports: setup-env set-env-$(NODE_LABEL) @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) @:$(call check_defined, NODE_IP_ADDRESS, IP address of the test node not defined) + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/fetch-test-reports.yml \ --private-key="$(SSH_KEY)" \ --extra-vars "$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ @@ -199,8 +225,9 @@ destroy-elastic-stack: ssh $(SSH_OPTS_EXTENDED) -i $(SSH_KEY) $(STACK_USER)@$(STACK_IP_ADDRESS) 'sudo docker-compose -f /root/.op/compose/profiles/fleet/docker-compose.yml down --remove-orphans' .PHONY: start-elastic-stack -start-elastic-stack: +start-elastic-stack: setup-env show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) + source $(VENV_DIR)/bin/activate; \ PROVIDER="remote" SUITE="$(SUITE)" TAGS="non-existent-tag" \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/elastic-stack.yml \ --private-key="$(SSH_KEY)" \ @@ -217,8 +244,9 @@ recreate-fleet-server: $(MAKE) start-elastic-stack .PHONY: run-tests -run-tests: set-env-$(NODE_LABEL) +run-tests: setup-env set-env-$(NODE_LABEL) show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && PROVIDER="$(PROVIDER)" SUITE="$(SUITE)" TAGS="$(TAGS)" REPORT_PREFIX="$(SUITE)_$${NODE_LABEL}_$(TAGS)" \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml \ --private-key="$(SSH_KEY)" \ @@ -230,8 +258,9 @@ run-tests: set-env-$(NODE_LABEL) . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && ssh $(SSH_OPTS_EXTENDED) -i $(SSH_KEY) $${NODE_USER}@$(NODE_IP_ADDRESS) "sudo bash /home/$${NODE_USER}/e2e-testing/.ci/scripts/functional-test.sh \"$(TAGS)\"" .PHONY: run-tests-win -run-tests-win: set-env-$(NODE_LABEL) +run-tests-win: setup-env set-env-$(NODE_LABEL) show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) + source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && PROVIDER="$(PROVIDER)" SUITE="$(SUITE)" TAGS="$(TAGS)" REPORT_PREFIX="$(SUITE)_$${NODE_LABEL}_$(TAGS)" \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml \ --private-key="$(SSH_KEY)" \ @@ -241,3 +270,7 @@ run-tests-win: set-env-$(NODE_LABEL) -t run-tests \ -i $(NODE_IP_ADDRESS), . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && ssh $(SSH_OPTS_EXTENDED) -i $(SSH_KEY) $${NODE_USER}@$(NODE_IP_ADDRESS) "powershell \"C:/Users/$${NODE_USER}/e2e-testing/.ci/scripts/functional-test.ps1\"" + +.PHONY: clean +clean: + rm -fr "$(PROJECT_DIR)/.ci/".env-* "$(PROJECT_DIR)/.ci/.node-host-ip" "$(PROJECT_DIR)/.ci/.runID" "$(PROJECT_DIR)/.ci/.stack-host-ip" "$(PROJECT_DIR)/outputs" "$(PROJECT_DIR)/None-sshhosts" "$(PROJECT_DIR)/stack-sshhosts" diff --git a/.ci/README.md b/.ci/README.md index 0cea48cc8a..44a7ef4648 100644 --- a/.ci/README.md +++ b/.ci/README.md @@ -18,7 +18,7 @@ In order to configure each platform, there is an `Ansible` script that installs It's possible that a consumer of the e2e tests would need to define a specific layout for the test execution, adding or removing suites and/or scenarios. That's the case for Beats or the Elastic Agent, which triggers the E2E tests with a different layout than for the own development of the test framework: while in Beats or the Elastic Agent we are more interested in running the test for Fleet only, when developing the project we want to verify all the test suites at a time. The structure of these files is the following: - **SUITES**: this entry will hold a YAML object containing a list of suite. Each suite in the list will be represented by a YAML object with the following attributes: - - **suite**: the name of the suite. Will be used to look up the root directory of the test suite, located under the `e2e/_suites` directory. Therefore, only `fleet`, `helm` and `kubernetes-autodiscover` are valid values. Required. + - **suite**: the name of the suite. Will be used to look up the root directory of the test suite, located under the `e2e/_suites` directory. Therefore, only `fleet`, and `kubernetes-autodiscover` are valid values. Required. - **provider**: declares the provider type for the test suite. Valid values are `docker`, `elastic-package` and `remote`. If not present, it will use `remote` as fallback. Optional. - **scenarios**: a list of YAML objects representing the test scenarios, where the tests are executed. A test scenario will basically declare how to run a set of test, using the following attributes: - **name**: name of the test scenario. It will be used by Jenkins to name the parallel stage representing this scenario. Required. @@ -114,6 +114,10 @@ A `.stack-host-ip` file will be created in the `.ci` directory of the project in Please remember to [destroy the stack](#destroying-the-stack-and-the-test-nodes) once you finished your testing. +> You probably need to run `start-elastic-stack` command twice: the Fleet Server could try to start faster than Kibana and die. Running the command again will recreate the container for Fleet Server. + +> The `recreate-fleet-server` command has been deprecated, and calls the `start-elastic-stack` command instead. + ### Create and configure the test node There are different VM flavours that you can use to run the Elastic Agent and enroll it into the Stack: Debian, CentOS, SLES15, Oracle Linux... using AMD and ARM as architecture. You can find the full reference of the platform support [here](https://github.com/elastic/e2e-testing/blob/4517dfa134844f720139d6bab3955cc8d9c6685c/.ci/.e2e-platforms.yaml#L2-L42). @@ -165,12 +169,11 @@ $ env | grep NODE NODE_LABEL=centos8_arm64 ``` -Besides that, it's possible to configure the test node for the different test suites that are present in the test framework: `fleet`, `helm` and `kubernetes-autodiscover`. Please configure the test node setting the suite, being `fleet` the default: +Besides that, it's possible to configure the test node for the different test suites that are present in the test framework: `fleet`, and `kubernetes-autodiscover`. Please configure the test node setting the suite, being `fleet` the default: ```shell # all possible suites export SUITE="fleet" -export SUITE="helm" export SUITE="kubernetes-autodiscover" ``` @@ -198,18 +201,6 @@ A `.node-host-ip` file will be created in the `.ci` directory of the project inc Please remember to [destroy the node](#destroying-the-stack-and-the-test-nodes) once you have finished your testing. -Finally, start the stack: - -```shell -export SSH_KEY="PATH_TO_YOUR_SSH_KEY_WITH_ACCESS_TO_AWS" -export SUITE="fleet" -make -C .ci start-elastic-stack -``` - -> You probably need to run this command twice: the Fleet Server could try to start faster than Kibana and die. Running the command again will recreate the container for Fleet Server. - -> The `recreate-fleet-server` command has been deprecated, and calls the `start-elastic-stack` command instead. - ### Run a test suite You can select the specific tags that you want to include in the test execution. Please look for the different tags in the existing feature files for the suite you are interested in running. For that, please check out the tags/annotations that are present in those feature files (`*.feature`), which live in the `features` directory under your test suite. In example, for `fleet` test suite, you can find them [here](../e2e/_suites/fleet/features/). @@ -347,3 +338,127 @@ Make sure : - Check 600 permission is provided to id_rsa key files. - Run list-platforms command and export Node variabe to resolve Node creation errors. - While creating windows node, we need to run `create-node` command in portions such as `provision-node` and `setup-node`. Also, some times you need to ssh node to create it successfully. + +### ERROR! couldn't resolve module/action 'ec2' + +This is caused by problems to resolve the Galaxy collections you have installed. +This is an example that causes the issue we have `community.general 4.3.0` in our user collection and the project uses `community.general 5.8.3` + +```bash +> ansible-galaxy collection list +# /Users/myuser/.ansible/collections/ansible_collections +Collection Version +----------------- ------- +amazon.aws 5.1.0 +community.general 4.3.0 + +# /Users/myuser/src/e2e-testing/.venv/lib/python3.8/site-packages/ansible_collections +Collection Version +----------------------------- ------- +amazon.aws 3.5.0 +ansible.netcommon 3.1.3 +ansible.posix 1.4.0 +ansible.utils 2.8.0 +ansible.windows 1.12.0 +arista.eos 5.0.1 +awx.awx 21.10.0 +azure.azcollection 1.14.0 +check_point.mgmt 2.3.0 +chocolatey.chocolatey 1.3.1 +cisco.aci 2.3.0 +cisco.asa 3.1.0 +cisco.dnac 6.6.1 +cisco.intersight 1.0.22 +cisco.ios 3.3.2 +cisco.iosxr 3.3.1 +cisco.ise 2.5.9 +cisco.meraki 2.13.0 +cisco.mso 2.1.0 +cisco.nso 1.0.3 +cisco.nxos 3.2.0 +cisco.ucs 1.8.0 +cloud.common 2.1.2 +cloudscale_ch.cloud 2.2.3 +community.aws 3.6.0 +community.azure 1.1.0 +community.ciscosmb 1.0.5 +community.crypto 2.9.0 +community.digitalocean 1.22.0 +community.dns 2.4.2 +community.docker 2.7.3 +community.fortios 1.0.0 +community.general 5.8.3 +community.google 1.0.0 +community.grafana 1.5.3 +community.hashi_vault 3.4.0 +community.hrobot 1.6.0 +community.libvirt 1.2.0 +community.mongodb 1.4.2 +community.mysql 3.5.1 +community.network 4.0.2 +community.okd 2.2.0 +community.postgresql 2.3.1 +community.proxysql 1.4.0 +community.rabbitmq 1.2.3 +community.routeros 2.5.0 +community.sap 1.0.0 +community.sap_libs 1.4.0 +community.skydive 1.0.0 +community.sops 1.5.0 +community.vmware 2.10.2 +community.windows 1.11.1 +community.zabbix 1.9.0 +containers.podman 1.10.1 +cyberark.conjur 1.2.0 +cyberark.pas 1.0.14 +dellemc.enterprise_sonic 1.1.2 +dellemc.openmanage 5.5.0 +dellemc.os10 1.1.1 +dellemc.os6 1.0.7 +dellemc.os9 1.0.4 +f5networks.f5_modules 1.21.0 +fortinet.fortimanager 2.1.7 +fortinet.fortios 2.2.1 +frr.frr 2.0.0 +gluster.gluster 1.0.2 +google.cloud 1.0.2 +hetzner.hcloud 1.9.0 +hpe.nimble 1.1.4 +ibm.qradar 2.1.0 +ibm.spectrum_virtualize 1.10.0 +infinidat.infinibox 1.3.12 +infoblox.nios_modules 1.4.1 +inspur.ispim 1.2.0 +inspur.sm 2.3.0 +junipernetworks.junos 3.1.0 +kubernetes.core 2.3.2 +lowlydba.sqlserver 1.2.0 +mellanox.onyx 1.0.0 +netapp.aws 21.7.0 +netapp.azure 21.10.0 +netapp.cloudmanager 21.21.0 +netapp.elementsw 21.7.0 +netapp.ontap 21.24.1 +netapp.storagegrid 21.11.1 +netapp.um_info 21.8.0 +netapp_eseries.santricity 1.3.1 +netbox.netbox 3.9.0 +ngine_io.cloudstack 2.3.0 +ngine_io.exoscale 1.0.0 +ngine_io.vultr 1.1.2 +openstack.cloud 1.10.0 +openvswitch.openvswitch 2.1.0 +ovirt.ovirt 2.4.1 +purestorage.flasharray 1.15.0 +purestorage.flashblade 1.10.0 +purestorage.fusion 1.2.0 +sensu.sensu_go 1.13.1 +servicenow.servicenow 1.0.6 +splunk.es 2.1.0 +t_systems_mms.icinga_director 1.31.4 +theforeman.foreman 3.7.0 +vmware.vmware_rest 2.2.0 +vultr.cloud 1.3.1 +vyos.vyos 3.0.1 +wti.remote 1.0.4 +``` diff --git a/.ci/ansible/ansible.cfg b/.ci/ansible/ansible.cfg index f394c17c0b..2c13ef598c 100644 --- a/.ci/ansible/ansible.cfg +++ b/.ci/ansible/ansible.cfg @@ -1,2 +1,11 @@ [defaults] -callbacks_enabled = community.general.opentelemetry +executable = /bin/bash +module_lang = en_US.UTF-8 +force_color = true +stdout_callback = default +# callbacks_enabled = community.general.opentelemetry +localhost_warning = false +pretty_results = true +result_format = yaml +show_custom_stats = true +show_task_path_on_failure = true diff --git a/.ci/ansible/elastic-stack.yml b/.ci/ansible/elastic-stack.yml index bc738bd68c..1fc74f9a16 100644 --- a/.ci/ansible/elastic-stack.yml +++ b/.ci/ansible/elastic-stack.yml @@ -55,6 +55,11 @@ ansible.builtin.shell: args: cmd: "TAGS='non-existing-tag' {{ e2e_base_dir }}.ci/scripts/functional-test.sh" + register: _result + # FIXME workaround to avoid fail when the token expires + retries: 2 + delay: 10 + until: _result is succeeded - name: Wait for the stack to come up wait_for: host={{ inventory_hostname }} port={{ item.port }} delay=10 timeout=60 diff --git a/.ci/ansible/fetch-test-reports.yml b/.ci/ansible/fetch-test-reports.yml index 95aa7b634a..36d4e2e60e 100644 --- a/.ci/ansible/fetch-test-reports.yml +++ b/.ci/ansible/fetch-test-reports.yml @@ -19,7 +19,7 @@ paths: - 'group_vars' vars: - ansible_python_interpreter: "auto" + ansible_python_interpreter: "python3" ansible_shell_type: "{{ nodeShellType | default('sh') }}" ansible_user: "{{ nodeUser }}" pip_package: "python3-pip" diff --git a/.ci/ansible/playbook.yml b/.ci/ansible/playbook.yml index f392784d02..5aceb19dcc 100644 --- a/.ci/ansible/playbook.yml +++ b/.ci/ansible/playbook.yml @@ -138,7 +138,7 @@ when: - ansible_facts['os_family'] != "Windows" - suite is defined - - suite in ["kubernetes-autodiscover", "helm"] + - suite in ["kubernetes-autodiscover"] - role: mdelapenya.go become: True when: ansible_facts['os_family'] != "Windows" @@ -167,7 +167,7 @@ when: - ansible_facts['os_family'] != "Windows" - suite is defined - - suite in ["kubernetes-autodiscover", "helm"] + - suite in ["kubernetes-autodiscover"] - name: Setup source code include_tasks: tasks/copy_test_files.yml diff --git a/.ci/ansible/requirements.txt b/.ci/ansible/requirements.txt index be0d348bcd..53f2588fd1 100644 --- a/.ci/ansible/requirements.txt +++ b/.ci/ansible/requirements.txt @@ -1,9 +1,36 @@ -###### Requirements without Version Specifiers ###### -ansible -boto -boto3 -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk +ansible==6.7.0 +ansible-core==2.13.7 +backoff==2.2.1 +boto==2.49.0 +boto3==1.26.29 +botocore==1.29.29 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==2.1.1 +cryptography==38.0.4 +Deprecated==1.2.13 +googleapis-common-protos==1.56.4 +grpcio==1.51.1 +idna==3.4 +Jinja2==3.1.2 +jmespath==1.0.1 +MarkupSafe==2.1.1 +opentelemetry-api==1.15.0 +opentelemetry-exporter-otlp==1.15.0 +opentelemetry-exporter-otlp-proto-grpc==1.15.0 +opentelemetry-exporter-otlp-proto-http==1.15.0 +opentelemetry-proto==1.15.0 +opentelemetry-sdk==1.15.0 +opentelemetry-semantic-conventions==0.36b0 +packaging==22.0 protobuf==3.20.1 #Temporary change because of protobuf new version bug: https://github.com/protocolbuffers/protobuf/issues/10051 -requests +pycparser==2.21 +python-dateutil==2.8.2 +PyYAML==6.0 +requests==2.28.1 +resolvelib==0.8.1 +s3transfer==0.6.0 +six==1.16.0 +typing_extensions==4.4.0 +urllib3==1.26.13 +wrapt==1.14.1 diff --git a/.ci/ansible/tasks/copy_test_files.yml b/.ci/ansible/tasks/copy_test_files.yml index 4cccfa8c04..d64693ac9e 100644 --- a/.ci/ansible/tasks/copy_test_files.yml +++ b/.ci/ansible/tasks/copy_test_files.yml @@ -27,3 +27,4 @@ delegate_to: "localhost" vars: ansible_shell_type: "sh" # because the rsync is executed locally, we need to use the current + ansible_python_interpreter: "python3" diff --git a/.ci/ansible/tasks/fetch_test_reports.yml b/.ci/ansible/tasks/fetch_test_reports.yml index 771a08a743..fce68f99e4 100644 --- a/.ci/ansible/tasks/fetch_test_reports.yml +++ b/.ci/ansible/tasks/fetch_test_reports.yml @@ -1,13 +1,31 @@ --- +- name: Fetch Test logs + become: false + environment: + PATH: "/home/admin/bin:/home/admin/go/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/go/bin" + ansible.builtin.shell: + cmd: | + ([ -d ./kubernetes-autodiscover ] && tar -czf "kubernetes-autodiscover-logs.tgz" "./kubernetes-autodiscover") || true + ([ -d ./docker-logs ] && tar -czf "docker-logs.tgz" "./docker-logs") || true + ([ -d ./fleet ] tar -czf "fleet-logs.tgz" "./fleet") || true + chdir: "{{ e2e_base_dir }}outputs" + tags: + - fetch-reports + when: + - ansible_facts['os_family'] != "Windows" + - name: Find the Test reports to copy/fetch - become: no - ansible.builtin.find: + become: false + ansible.builtin.find: paths: "{{ e2e_base_dir }}outputs" file_type: file - use_regex: yes + use_regex: yes + recurse: yes patterns: - - '^TEST.*json$' - - '^TEST.*xml$' + - "^TEST.*json$" + - "^TEST.*xml$" + - "^.*tgz$" + - "^.*zip$" register: files_2_fetch tags: - fetch-reports @@ -15,14 +33,17 @@ - ansible_facts['os_family'] != "Windows" - name: Find the Test reports to copy/fetch (Windows) - become: no - ansible.windows.win_find: + become: false + ansible.windows.win_find: paths: "{{ e2e_base_dir }}outputs" file_type: file - use_regex: yes + use_regex: yes + recurse: yes patterns: - - '^TEST.*json$' - - '^TEST.*xml$' + - "^TEST.*json$" + - "^TEST.*xml$" + - "^.*tgz$" + - "^.*zip$" register: files_2_fetch_win tags: - fetch-reports @@ -30,7 +51,7 @@ - ansible_facts['os_family'] == "Windows" - name: Create local directory - become: no + become: false vars: ansible_shell_type: "sh" ansible.builtin.file: @@ -40,11 +61,24 @@ tags: - fetch-reports +- name: Set report folder name + become: false + vars: + _prefix: "{{ lookup('env', 'REPORT_PREFIX') | default('stack') | replace(' ', '_') }}" + _suite: "{{ lookup('env', 'SUITE') | default('') | replace(' ', '_') }}" + _arch: "{{ lookup('env', 'GOARCH') | default('') }}" + _node_label: "{{ lookup('env', 'NODE_LABEL') | default('') }}" + _tags: "{{ lookup('env', 'TAGS') | default('none') | replace(' ', '_') | replace('@', '') | replace('~', '') | replace('&', '') | replace('|', '')}}" + set_fact: + report_folder: "{{ workspace }}outputs/{{ _prefix }}{{ _suite }}-{{ _node_label }}{{ _arch }}-{{ _tags }}" + tags: + - fetch-reports + - name: Fetch the Test reports - become: no + become: false ansible.builtin.fetch: src: "{{ item.path }}" - dest: "{{ workspace }}outputs/{{ inventory_hostname }}/" + dest: "{{ report_folder }}/" flat: yes fail_on_missing: no with_items: "{{ files_2_fetch.files }}" @@ -54,10 +88,10 @@ - ansible_facts['os_family'] != "Windows" - name: Fetch the Test reports (Windows) - become: no + become: false ansible.builtin.fetch: src: "{{ item.path }}" - dest: "{{ workspace }}outputs/{{ inventory_hostname }}/" + dest: "{{ report_folder }}/" flat: yes fail_on_missing: no with_items: "{{ files_2_fetch_win.files }}" diff --git a/.ci/ansible/tasks/runners.yml b/.ci/ansible/tasks/runners.yml index 5ad6095310..9ae89ba517 100644 --- a/.ci/ansible/tasks/runners.yml +++ b/.ci/ansible/tasks/runners.yml @@ -14,19 +14,27 @@ git_sha: "{{ gitSha | default('Not running on CI')}}" repo: "{{repo | default('Not running on CI')}}" run_id: "{{runId | default('Not running on CI')}}" + division: engineering + org: obs + team: eng-productivity + project: e2e tags: - provision-stack - provision-node - name: "Create {{nodeLabel}} AWS instances" - ec2: - wait: yes + ec2_instance: + state: started + wait: true + name: "e2e-{{ instanceID}}" key_name: "e2essh-{{runId}}" region: us-east-2 - group: e2e - image: '{{nodeImage}}' + security_group: e2e + image_id: '{{nodeImage}}' instance_type: '{{nodeInstanceType}}' - instance_tags: + network: + assign_public_ip: true + tags: branch: "{{branch | default('Not running on CI')}}" build: "{{build | default('Not running on CI')}}" build_url: "{{buildURL | default('Not running on CI') }}" @@ -40,30 +48,35 @@ reaper_mark: "e2e-testing-vm" repo: "{{repo | default('Not running on CI')}}" run_id: "{{runId | default('Not running on CI')}}" - count_tag: - name: "e2e-{{ instanceID }}" + division: engineering + org: obs + team: eng-productivity + project: e2e volumes: - device_name: /dev/sda1 - volume_type: gp3 - volume_size: "{{ (nodeLabel != 'windows2019') | ternary(15, 60) }}" - delete_on_termination: yes + ebs: + volume_type: gp3 + volume_size: "{{ (nodeLabel != 'windows2019') | ternary(15, 60) }}" + delete_on_termination: true - device_name: /dev/xvda - volume_type: gp3 - volume_size: 200 - delete_on_termination: yes - ebs_optimized: yes + ebs: + volume_type: gp3 + volume_size: 200 + delete_on_termination: true + ebs_optimized: true register: ec2 tags: - provision-stack - provision-node - name: Add AWS host to ssh address list + no_log: true lineinfile: state: present - line: "- {{ nodeUser }}@{{addr.public_ip}}" + line: "- {{ nodeUser }}@{{addr.public_ip_address}}" insertafter: EOF dest: "{{ workspace }}{{nodeLabel}}-sshhosts" - create: yes + create: true loop: "{{ ec2.instances }}" loop_control: loop_var: addr @@ -74,7 +87,7 @@ - name: Add stack AWS host to ssh address list copy: dest: "{{ workspace }}.ci/.stack-host-ip" - content: "{{addr.public_ip}}" + content: "{{addr.public_ip_address}}" force: true loop: "{{ ec2.instances }}" loop_control: @@ -85,7 +98,7 @@ - name: Add node AWS host to ssh address list copy: dest: "{{ workspace }}.ci/.node-host-ip" - content: "{{addr.public_ip}}" + content: "{{addr.public_ip_address}}" force: true loop: "{{ ec2.instances }}" loop_control: @@ -94,7 +107,7 @@ - provision-node - name: Wait for SSH to come up - wait_for: host={{ nodeItem.public_ip }} port=22 delay=10 + wait_for: host={{ nodeItem.public_ip_address }} port=22 delay=10 loop: "{{ ec2.instances }}" loop_control: loop_var: nodeItem diff --git a/.ci/ansible/tasks/setup_test_script.yml b/.ci/ansible/tasks/setup_test_script.yml index 62a26f39cf..9e3d7532ef 100644 --- a/.ci/ansible/tasks/setup_test_script.yml +++ b/.ci/ansible/tasks/setup_test_script.yml @@ -5,6 +5,7 @@ state: absent - name: Extend environment for Stack Bootstrapping + no_log: true lineinfile: state: present line: "{{ item }}" @@ -26,6 +27,7 @@ - scripts - name: Extend environment for Fleet testing + no_log: true lineinfile: state: present line: "{{item}}" @@ -35,9 +37,9 @@ with_items: - "SUITE=\"{{ lookup('env', 'SUITE') or 'fleet' }}\"" - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'remote' }}\"" - - "ELASTICSEARCH_PASSWORD=\"changeme\"" - - "KIBANA_PASSWORD=\"changeme\"" - - "SKIP_PULL=\"1\"" + - "ELASTICSEARCH_PASSWORD=\"{{ lookup('env', 'ELASTICSEARCH_PASSWORD') or 'changeme' }}\"" + - "KIBANA_PASSWORD=\"{{ lookup('env', 'KIBANA_PASSWORD') or 'changeme' }}\"" + - "SKIP_PULL=\"{{ lookup('env', 'SKIP_PULL') or '1' }}\"" - "DEVELOPER_MODE=\"{{ lookup('env', 'DEVELOPER_MODE') or 'false' }}\"" when: - suite is defined @@ -47,6 +49,7 @@ - scripts - name: Extend environment for Remote provider + no_log: true lineinfile: state: present line: "{{ item }}" @@ -64,21 +67,27 @@ tags: - scripts -- name: Extend environment for Fleet with elastic-package testing +- name: Extend environment for Remote provider + no_log: true lineinfile: state: present - line: "{{item}}" + line: "{{ item }}" insertafter: EOF dest: "{{ e2e_home_dir }}.env" create: yes with_items: - - "SUITE=\"{{ lookup('env', 'SUITE') or 'fleet' }}\"" - - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'elastic-package' }}\"" - when: "'fleet_elastic_pkg' in nodeLabel" + - "ELASTICSEARCH_URL=\"{{ lookup('env', 'ELASTICSEARCH_URL') }}\"" + - "KIBANA_URL=\"{{ lookup('env', 'KIBANA_URL')}}\"" + - "FLEET_URL=\"{{ lookup('env', 'FLEET_URL')}}\"" + when: + - suite is defined + - lookup('env', 'ELASTICSEARCH_URL') != '' + - lookup('env', 'PROVIDER') == 'remote' tags: - scripts -- name: Extend environment for Kubernetes Autodiscover testing +- name: Extend environment for Fleet with elastic-package testing + no_log: true lineinfile: state: present line: "{{item}}" @@ -86,18 +95,14 @@ dest: "{{ e2e_home_dir }}.env" create: yes with_items: - - "SUITE=\"{{ lookup('env', 'SUITE') or 'kubernetes-autodiscover' }}\"" - - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'docker' }}\"" - - "KIND_VERSION=\"{{ lookup('env', 'KIND_VERSION') }}\"" - - "KUBERNETES_VERSION=\"{{ lookup('env', 'KUBERNETES_VERSION') }}\"" - - "DEVELOPER_MODE=\"{{ lookup('env', 'DEVELOPER_MODE') or 'false' }}\"" - when: - - suite is defined - - suite == "kubernetes-autodiscover" + - "SUITE=\"{{ lookup('env', 'SUITE') or 'fleet' }}\"" + - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'elastic-package' }}\"" + when: "'fleet_elastic_pkg' in nodeLabel" tags: - scripts -- name: Extend environment for Helm testing +- name: Extend environment for Kubernetes Autodiscover testing + no_log: true lineinfile: state: present line: "{{item}}" @@ -105,20 +110,19 @@ dest: "{{ e2e_home_dir }}.env" create: yes with_items: - - "SUITE=\"{{ lookup('env', 'SUITE') or 'helm' }}\"" + - "SUITE=\"{{ lookup('env', 'SUITE') or 'kubernetes-autodiscover' }}\"" - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'docker' }}\"" - - "HELM_CHART_VERSION=\"{{ lookup('env', 'HELM_CHART_VERSION') }}\"" - - "HELM_VERSION=\"{{ lookup('env', 'HELM_VERSION') }}\"" - "KIND_VERSION=\"{{ lookup('env', 'KIND_VERSION') }}\"" - "KUBERNETES_VERSION=\"{{ lookup('env', 'KUBERNETES_VERSION') }}\"" - "DEVELOPER_MODE=\"{{ lookup('env', 'DEVELOPER_MODE') or 'false' }}\"" when: - suite is defined - - suite == "helm" + - suite == "kubernetes-autodiscover" tags: - scripts - name: Extend environment + no_log: true lineinfile: state: present line: "{{item}}" @@ -157,27 +161,29 @@ mode: '0777' dest: "{{ e2e_base_dir }}.ci/scripts/functional-test.sh" content: | - #!/usr/bin/env bash - set -euxo pipefail + #!/usr/bin/env bash + set -euo pipefail + + BASE_DIR="/home/{{ansible_user}}/e2e-testing" + SUITE="{{ lookup('env', 'SUITE') or 'fleet' }}" + REPORT_PREFIX="{{ lookup('env', 'REPORT_PREFIX') or 'junit' }}" - BASE_DIR="/home/{{ansible_user}}/e2e-testing" - SUITE="{{ lookup('env', 'SUITE') or 'fleet' }}" - REPORT_PREFIX="{{ lookup('env', 'REPORT_PREFIX') or 'junit' }}" + export PATH="$PATH:/usr/local/go/bin" - export PATH="$PATH:/usr/local/go/bin" + BASE_DIR=${BASE_DIR} "${BASE_DIR}/.ci/scripts/install-test-dependencies.sh" "${SUITE}" - BASE_DIR=${BASE_DIR} "${BASE_DIR}/.ci/scripts/install-test-dependencies.sh" "${SUITE}" + OUTPUT_DIR="{{ e2e_base_dir }}outputs/docker-logs" OUTPUT_FILE="docker-logs-${REPORT_PREFIX}-{{ runId }}-${SUITE}" "${BASE_DIR}/.ci/scripts/run_filebeat.sh" - REPORT_PREFIX=$(echo "$REPORT_PREFIX" | sed -r 's/[ @~]+//g') - SEED="$(date +%Y-%m-%d-%H:%M:%S)" - REPORT="{{ e2e_base_dir }}outputs/TEST-${REPORT_PREFIX}-{{ runId }}-${SEED}" - echo "REPORT=\"${REPORT}"\" >> {{ e2e_home_dir }}.env + REPORT_PREFIX=$(echo "$REPORT_PREFIX" | sed -r 's/[ @~]+//g') + SEED="$(date +%Y-%m-%d-%H:%M:%S)" + REPORT="{{ e2e_base_dir }}outputs/TEST-${REPORT_PREFIX}-{{ runId }}-${SEED}" + echo "REPORT=\"${REPORT}"\" >> {{ e2e_home_dir }}.env - echo "Removing previous test files in the case the workspace is reused" - rm -f {{ e2e_base_dir }}outputs/TEST-*.* + echo "Removing previous test files in the case the workspace is reused" + rm -f {{ e2e_base_dir }}outputs/TEST-*.* - TAGS="{{ lookup('env', 'TAGS') }}" \ - FORMAT="pretty,cucumber:${REPORT}.json,junit:${REPORT}.xml" \ - make --no-print-directory -C "{{ e2e_base_dir }}e2e/_suites/${SUITE}" functional-test + TAGS="{{ lookup('env', 'TAGS') }}" \ + FORMAT="pretty,cucumber:${REPORT}.json,junit:${REPORT}.xml" \ + make --no-print-directory -C "{{ e2e_base_dir }}e2e/_suites/${SUITE}" functional-test tags: - scripts diff --git a/.ci/ansible/tasks/setup_test_script_windows.yml b/.ci/ansible/tasks/setup_test_script_windows.yml index 6eed75fe4d..b505301cf0 100644 --- a/.ci/ansible/tasks/setup_test_script_windows.yml +++ b/.ci/ansible/tasks/setup_test_script_windows.yml @@ -5,6 +5,7 @@ state: absent - name: Extend environment for Fleet testing (Windows) + no_log: true community.windows.win_lineinfile: state: present line: "{{item}}" @@ -14,9 +15,10 @@ with_items: - "SUITE=\"{{ lookup('env', 'SUITE') or 'fleet' }}\"" - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'remote' }}\"" - - "ELASTICSEARCH_PASSWORD=\"changeme\"" - - "KIBANA_PASSWORD=\"changeme\"" - - "SKIP_PULL=\"1\"" + - "ELASTICSEARCH_PASSWORD=\"{{ lookup('env', 'ELASTICSEARCH_PASSWORD') or 'changeme' }}\"" + - "KIBANA_PASSWORD=\"{{ lookup('env', 'KIBANA_PASSWORD') or 'changeme' }}\"" + - "SKIP_PULL=\"{{ lookup('env', 'SKIP_PULL') or '1' }}\"" + - "DEVELOPER_MODE=\"{{ lookup('env', 'DEVELOPER_MODE') or 'false' }}\"" when: - suite is defined - stackRunner is defined @@ -25,6 +27,7 @@ - scripts - name: Extend environment for Remote provider (Windows) + no_log: true community.windows.win_lineinfile: state: present line: "{{ item }}" @@ -32,9 +35,9 @@ dest: "{{ e2e_home_dir }}.env" create: yes with_items: - - "ELASTICSEARCH_URL=\"http://{{ stackRunner }}:9200\"" - - "KIBANA_URL=\"http://{{ stackRunner }}:5601\"" - - "FLEET_URL=\"http://{{ stackRunner }}:8220\"" + - "ELASTICSEARCH_URL=\"{{ lookup('env', 'ELASTICSEARCH_URL') or 'http://{{ stackRunner }}:9200' }}\"" + - "KIBANA_URL=\"{{ lookup('env', 'KIBANA_URL') or 'http://{{ stackRunner }}:5601' }}\"" + - "FLEET_URL=\"{{ lookup('env', 'FLEET_URL') or 'http://{{ stackRunner }}:8220' }}\"" when: - suite is defined - stackRunner is defined @@ -43,6 +46,7 @@ - scripts - name: Extend environment for Fleet with elastic-package testing (Windows) + no_log: true community.windows.win_lineinfile: state: present line: "{{item}}" @@ -57,6 +61,7 @@ - scripts - name: Extend environment for Kubernetes Autodiscover testing (Windows) + no_log: true community.windows.win_lineinfile: state: present line: "{{item}}" @@ -74,27 +79,8 @@ tags: - scripts -- name: Extend environment for Helm testing (Windows) - community.windows.win_lineinfile: - state: present - line: "{{item}}" - insertafter: EOF - dest: "{{ e2e_home_dir }}.env" - create: yes - with_items: - - "SUITE=\"{{ lookup('env', 'SUITE') or 'helm' }}\"" - - "PROVIDER=\"{{ lookup('env', 'PROVIDER') or 'docker' }}\"" - - "HELM_CHART_VERSION=\"{{ lookup('env', 'HELM_CHART_VERSION') }}\"" - - "HELM_VERSION=\"{{ lookup('env', 'HELM_VERSION') }}\"" - - "KIND_VERSION=\"{{ lookup('env', 'KIND_VERSION') }}\"" - - "KUBERNETES_VERSION=\"{{ lookup('env', 'KUBERNETES_VERSION') }}\"" - when: - - suite is defined - - suite == "helm" - tags: - - scripts - - name: Extend environment (Windows) + no_log: true community.windows.win_lineinfile: state: present line: "{{item}}" diff --git a/.ci/ansible/teardown.yml b/.ci/ansible/teardown.yml index ac2a829d53..5fe6a36026 100644 --- a/.ci/ansible/teardown.yml +++ b/.ci/ansible/teardown.yml @@ -8,31 +8,21 @@ amazon.aws.ec2_instance_info: region: us-east-2 filters: - "tag:name": e2e-{{ instanceID }} + "tag:name": "e2e-{{ instanceID }}" register: ec2_node_info - name: Print ec2 info ansible.builtin.debug: - var: ec2_node_info + var: ec2_node_info.instances[0].tags.name - name: "Destroy environment" - ec2: - key_name: "e2essh-{{ runId }}" + amazon.aws.ec2_instance: + state: terminated + filters: + "tag:name": "e2e-{{ instanceID }}" region: us-east-2 - group: e2e - image: '{{ nodeImage }}' - instance_tags: - name: "e2e-{{ instanceID }}" - exact_count: 0 - count_tag: - name: "e2e-{{ instanceID }}" - async: 45 - poll: 0 - - name: "Delete AWS keypair" ec2_key: region: us-east-2 name: "e2essh-{{ runId }}" state: absent - async: 45 - poll: 0 diff --git a/.ci/e2eTestingHelmDaily.groovy b/.ci/e2eTestingHelmDaily.groovy deleted file mode 100644 index 45d46fea8f..0000000000 --- a/.ci/e2eTestingHelmDaily.groovy +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -@Library('apm@current') _ - -pipeline { - agent none - environment { - JOB_GCS_BUCKET = credentials('gcs-bucket') - NOTIFY_TO = credentials('notify-to') - PIPELINE_LOG_LEVEL='INFO' - } - options { - timeout(time: 120, unit: 'MINUTES') - buildDiscarder(logRotator(numToKeepStr: '20', artifactNumToKeepStr: '20')) - timestamps() - ansiColor('xterm') - disableResume() - durabilityHint('PERFORMANCE_OPTIMIZED') - rateLimitBuilds(throttle: [count: 60, durationName: 'hour', userBoost: true]) - quietPeriod(10) - } - stages { - stage('Run Tests') { - steps { - runE2E(jobName: "${env.JOB_BASE_NAME}", - runTestsSuites: 'helm', - slackChannel: 'integrations', - propagate: true, - wait: true) - } - } - } - post { - cleanup { - notifyBuildResult() - } - } -} diff --git a/.ci/e2eTestingMacosDaily.groovy b/.ci/e2eTestingMacosDaily.groovy index 201b0d02e1..b69bae6b25 100644 --- a/.ci/e2eTestingMacosDaily.groovy +++ b/.ci/e2eTestingMacosDaily.groovy @@ -34,9 +34,9 @@ pipeline { quietPeriod(10) } parameters { - string(name: 'ELASTIC_AGENT_VERSION', defaultValue: '8.6.0-SNAPSHOT', description: 'SemVer version of the Elastic Agent to be used for the tests. You can use here the tag of your PR to test your changes') - string(name: 'ELASTIC_STACK_VERSION', defaultValue: '8.6.0-SNAPSHOT', description: 'SemVer version of the stack to be used for the tests.') - string(name: 'BEAT_VERSION', defaultValue: '8.6.0-SNAPSHOT', description: 'SemVer version of the Beat to be used for the tests. You can use here the tag of your PR to test your changes') + string(name: 'ELASTIC_AGENT_VERSION', defaultValue: '8.7.0-SNAPSHOT', description: 'SemVer version of the Elastic Agent to be used for the tests. You can use here the tag of your PR to test your changes') + string(name: 'ELASTIC_STACK_VERSION', defaultValue: '8.7.0-SNAPSHOT', description: 'SemVer version of the stack to be used for the tests.') + string(name: 'BEAT_VERSION', defaultValue: '8.7.0-SNAPSHOT', description: 'SemVer version of the Beat to be used for the tests. You can use here the tag of your PR to test your changes') string(name: 'GITHUB_CHECK_REPO', defaultValue: '', description: 'Name of the GitHub repo to be updated. Only modified if this build is triggered from another parent stream (i.e. Beats).') string(name: 'GITHUB_CHECK_SHA1', defaultValue: '', description: 'Git SHA for the Beats upstream project (branch or PR)') choice(name: 'LOG_LEVEL', choices: ['TRACE', 'DEBUG', 'INFO'], description: 'Log level to be used') diff --git a/.ci/jobs/e2e-testing-helm-daily-mbp.yml b/.ci/jobs/e2e-testing-mbp-tmp.yml similarity index 65% rename from .ci/jobs/e2e-testing-helm-daily-mbp.yml rename to .ci/jobs/e2e-testing-mbp-tmp.yml index 6bf5e85a34..da2ad50cc4 100644 --- a/.ci/jobs/e2e-testing-helm-daily-mbp.yml +++ b/.ci/jobs/e2e-testing-mbp-tmp.yml @@ -1,30 +1,34 @@ --- - job: - name: e2e-tests/e2e-testing-helm-daily-mbp - display-name: End-2-End tests for Observability Helm charts Pipeline - description: Run E2E Helm Charts test suite daily, including maintenance branches + name: e2e-tests/e2e-testing-mbp-tmp + display-name: Temporary End-2-End Tests Pipeline + description: Temporary Jenkins pipeline for the e2e-testing project view: E2E project-type: multibranch - script-path: .ci/e2eTestingHelmDaily.groovy + logrotate: + daysToKeep: 30 + numToKeep: 100 + number-to-keep: 100 + days-to-keep: 30 + script-path: .ci/Jenkinsfile-tmp scm: - github: branch-discovery: no-pr - head-filter-regex: '(main|8\.\d|7\.17|feature-.*)' + head-filter-regex: '(PR-3169)' discover-pr-forks-strategy: merge-current discover-pr-forks-trust: permission discover-pr-origin: merge-current - discover-tags: false + discover-tags: true notification-context: 'beats-ci/e2e-testing' - disable-pr-notifications: true repo: e2e-testing repo-owner: elastic credentials-id: 2a9602aa-ab9f-4e52-baf3-b71ca88469c7-UserAndToken ssh-checkout: credentials: f6c7695a-671e-4f4f-a331-acdce44ff9ba - property-strategies: - all-branches: - - suppress-scm-triggering: true build-strategies: + - tags: + ignore-tags-older-than: -1 + ignore-tags-newer-than: -1 - regular-branches: true - change-request: ignore-target-only-changes: true @@ -33,7 +37,7 @@ before: true prune: true shallow-clone: true - depth: 3 + depth: 4 do-not-fetch-tags: true submodule: disable: false @@ -43,5 +47,3 @@ timeout: '15' use-author: true wipe-workspace: 'True' - triggers: - - timed: 'H H(4-5) * * 1-5' diff --git a/.ci/schedule-daily.groovy b/.ci/schedule-daily.groovy index 860aa7e23d..05ac37455a 100644 --- a/.ci/schedule-daily.groovy +++ b/.ci/schedule-daily.groovy @@ -38,8 +38,7 @@ def runBuilds(Map args = [:]) { def quietPeriod = 0 branches.each { branch -> if (isBranchUnifiedReleaseAvailable(branch)) { - build(quietPeriod: quietPeriod, job: "e2e-tests/e2e-testing-fleet-daily-mbp/${branch}", wait: false, propagate: false) - build(quietPeriod: quietPeriod, job: "e2e-tests/e2e-testing-helm-daily-mbp/${branch}", wait: false, propagate: false) + build(quietPeriod: quietPeriod, job: "e2e-tests/e2e-testing-fleet-daily-mbp/${branch}", wait: false, propagate: false) build(quietPeriod: quietPeriod, job: "e2e-tests/e2e-testing-k8s-autodiscovery-daily-mbp/${branch}", wait: false, propagate: false) // Increate the quiet period for the next iteration quietPeriod += args.quietPeriodFactor diff --git a/.ci/scripts/functional-test.sh b/.ci/scripts/functional-test.sh index 63c9673189..08c8fa7c00 100755 --- a/.ci/scripts/functional-test.sh +++ b/.ci/scripts/functional-test.sh @@ -16,6 +16,7 @@ set -euxo pipefail # - BEAT_VERSION - that's the version of the Beat to be tested. Default is stored in '.stack-version'. # - ELASTIC_AGENT_VERSION - that's the version of the Elastic Agent to be tested. Default is stored in '.stack-version'. # +# NOTE: this script is replaced in runtime by .ci/ansible/tasks/setup_test_script.yml BASE_VERSION="$(cat $(pwd)/.stack-version)" @@ -33,6 +34,8 @@ REPORT_PREFIX=${REPORT_PREFIX:-"${SUITE}_${GOARCH}_${TAGS}"} rm -rf outputs || true mkdir -p outputs +OUTPUT_DIR=$(pwd)/outputs/tests-logs .ci/scripts/run_filebeat.sh + REPORT_PREFIX=$(echo "$REPORT_PREFIX" | sed -r 's/[ @~]+//g') REPORT="$(pwd)/outputs/TEST-${REPORT_PREFIX}" diff --git a/.ci/scripts/gen-platform-env-file.py b/.ci/scripts/gen-platform-env-file.py new file mode 100755 index 0000000000..06363f3aca --- /dev/null +++ b/.ci/scripts/gen-platform-env-file.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +"""this script is used to parse the yaml file and generate the .env file.""" + +import sys +import yaml + +# store irst python argument in variable +platform_selected = sys.argv[1] +file_env = f'.env-{platform_selected}' +env_prefix = 'NODE' + +if platform_selected == 'stack': + env_prefix = 'STACK' + +PLATFORMS_FILE = '.e2e-platforms.yaml' +FILE_ENCODING = 'UTF-8' + +with open(PLATFORMS_FILE, 'r', encoding=FILE_ENCODING) as stream: + try: + values = yaml.safe_load(stream) + platforms = values['PLATFORMS'] + platform = platforms.get(platform_selected) + if platform is None: + print(f'Platform "{platform_selected}" not found') + sys.exit(1) + shell_type = platform.get('shell_type') + if shell_type is None: + shell_type = 'sh' + image = platform.get('image') + instance_type = platform.get('instance_type') + user = platform.get('username') + with open(file_env, 'w', encoding=FILE_ENCODING) as f: + f.write(f"export {env_prefix}_IMAGE={image}\n") + f.write(f"export {env_prefix}_INSTANCE_TYPE={instance_type}\n") + f.write(f"export {env_prefix}_LABEL={platform_selected}\n") + f.write(f"export {env_prefix}_SHELL_TYPE={shell_type}\n") + f.write(f"export {env_prefix}_USER={user}\n") + except yaml.YAMLError as exc: + print("Error parsing YAML file: ", exc) + sys.exit(1) diff --git a/.ci/scripts/run_filebeat.sh b/.ci/scripts/run_filebeat.sh new file mode 100755 index 0000000000..543dcf4ab5 --- /dev/null +++ b/.ci/scripts/run_filebeat.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +set -eu +OUTPUT_DIR=${OUTPUT_DIR:-'/tmp/filebeat'} +OUTPUT_FILE=${OUTPUT_FILE:-'docker'} +CONFIG_PATH=${CONFIG_PATH:-'/tmp/filebeat.yml'} +DOCKER_IMAGE=${DOCKER_IMAGE:-'docker.elastic.co/beats/filebeat:8.5.3'} + +echo "OUTPUT_DIR=${OUTPUT_DIR}" +echo "OUTPUT_FILE=${OUTPUT_FILE}" +echo "CONFIG_PATH=${CONFIG_PATH}" +echo "DOCKER_IMAGE=${DOCKER_IMAGE}" + +for c in $(docker ps --filter label="name=filebeat" -q) +do + docker kill "${c}" +done + +mkdir -p "${OUTPUT_DIR}" + +cat < "${CONFIG_PATH}" +--- +filebeat.autodiscover: + providers: + - type: docker + condition: + not: + contains: + docker.container.image: "${DOCKER_IMAGE}" + templates: + - config: + - type: container + paths: + - /var/lib/docker/containers/\${data.docker.container.id}/*.log +processors: + - add_host_metadata: ~ + - add_cloud_metadata: ~ + - add_docker_metadata: ~ + - add_kubernetes_metadata: ~ + +output.file: + path: "/output" + filename: ${OUTPUT_FILE} + permissions: 0644 + codec.format: + string: '{"image": "%{[container.image.name]}", "message": %{[message]}}' +EOF + +echo "INFO: Run filebeat" +docker run \ + --detach \ + -v "${OUTPUT_DIR}:/output" \ + -v "${CONFIG_PATH}:/usr/share/filebeat/filebeat.yml" \ + -u 0:0 \ + -v /var/lib/docker/containers:/var/lib/docker/containers \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -e OUTPUT_FILE="${OUTPUT_FILE}" \ + -p 5066:5066 \ + "${DOCKER_IMAGE}" \ + --strict.perms=false \ + -environment container \ + -E http.enabled=true > filebeat_docker_id + +ID=$(docker ps --filter label="name=filebeat" -q) +URL=${2:-"http://localhost:5066/stats?pretty"} + +echo "INFO: print existing docker context" +docker ps -a || true + +sleep 10 + +echo "INFO: wait for the docker container to be available" +N=0 +until docker exec "${ID}" curl -sSfI --retry 10 --retry-delay 5 --max-time 5 "${URL}" +do + sleep 5 + if [ "${N}" -gt 6 ]; then + echo "ERROR: print docker inspect" + docker inspect "${ID}" + echo "ERROR: docker container is not available" + docker logs "${ID}" + break; + fi + N=$((N + 1)) +done diff --git a/.ci/scripts/yq.sh b/.ci/scripts/yq.sh deleted file mode 100755 index e77677af15..0000000000 --- a/.ci/scripts/yq.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -## Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -## or more contributor license agreements. Licensed under the Elastic License; -## you may not use this file except in compliance with the Elastic License. - -set -euo pipefail - -BASEDIR=$(dirname "$0") -YQ_IMAGE="mikefarah/yq:4" - -PLATFORM="${1:-}" -FILE_ENV=".env-${PLATFORM}" -ENV_PREFIX="NODE" - -if [ "${PLATFORM}" == "stack" ]; then - ENV_PREFIX="STACK" -fi - -docker run --rm -w "/workdir" -v "${PWD}":/workdir ${YQ_IMAGE} ".PLATFORMS | with_entries( select( .key | (. == \"${PLATFORM}\") ) )" .e2e-platforms.yaml > ${PWD}/yml.tmp - -SHELL_TYPE=$(docker run --rm -i -w "/workdir" -v "${PWD}":/workdir ${YQ_IMAGE} ".${PLATFORM}.shell_type" < yml.tmp) -if [ "${SHELL_TYPE}" == "null" ]; then - SHELL_TYPE="sh" -fi - -echo "export ${ENV_PREFIX}_IMAGE="$(docker run --rm -i -w "/workdir" -v "${PWD}":/workdir ${YQ_IMAGE} ".${PLATFORM}.image" < yml.tmp) > "${FILE_ENV}" -echo "export ${ENV_PREFIX}_INSTANCE_TYPE="$(docker run --rm -i -w "/workdir" -v "${PWD}":/workdir ${YQ_IMAGE} ".${PLATFORM}.instance_type" < yml.tmp) >> "${FILE_ENV}" -echo "export ${ENV_PREFIX}_LABEL=${PLATFORM}" >> "${FILE_ENV}" -echo "export ${ENV_PREFIX}_SHELL_TYPE="${SHELL_TYPE} >> "${FILE_ENV}" -echo "export ${ENV_PREFIX}_USER="$(docker run --rm -i -w "/workdir" -v "${PWD}":/workdir ${YQ_IMAGE} ".${PLATFORM}.username" < yml.tmp) >> "${FILE_ENV}" - -rm -f ${PWD}/yml.tmp diff --git a/.github/paths-labeller.yml b/.github/paths-labeller.yml index b43521ebfd..73c6825501 100644 --- a/.github/paths-labeller.yml +++ b/.github/paths-labeller.yml @@ -7,8 +7,6 @@ - ".pre-commit-config.yaml" - "cli": - "cli/**/*.*" - - "helm": - - "e2e/_suites/helm/**/*.*" - "Team: Agent": - "e2e/_suites/fleet/**/*.*" - "licenses": diff --git a/.stack-version b/.stack-version index 4d0b5c9c4f..24d4566378 100644 --- a/.stack-version +++ b/.stack-version @@ -1 +1 @@ -8.6.0-55d181cf-SNAPSHOT +8.6.0-2d1af9b1-SNAPSHOT diff --git a/README.md b/README.md index 1143194c6a..344f73d4cb 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,6 @@ This repository contains: 1. A [CI Infrastructure](./.ci/README.md) to provision VMs where the tests will be executed at CI time. 2. A [Go library](./cli/README.md) to provision services in the way of Docker containers. It will provide the services using Docker Compose files. 3. A [test framework](./e2e/README.md) to execute e2e tests for certain Observability projects: - - [Observability Helm charts](./e2e/_suites/helm): - - APM Server - - Filebeat - - Metricbeat - [Kubernetes Autodiscover](./e2e/_suites/kubernetes-autodiscover) - [Fleet](./e2e/_suites/fleet) - Stand-Alone mode diff --git a/e2e/TROUBLESHOOTING.md b/e2e/TROUBLESHOOTING.md index 6185637d2a..cd063f7997 100644 --- a/e2e/TROUBLESHOOTING.md +++ b/e2e/TROUBLESHOOTING.md @@ -128,4 +128,3 @@ To change it, please use Docker UI, go to `Preferences > Resources > File Sharin - Go to '.venv/bin' folder and activate the python virtual environment. - Specify the ansible version in requirement.txt file under `.ci/ansible` folder. - Rerun requirements.txt file manually using `pip install -r requirements.txt' command. - diff --git a/e2e/_suites/fleet/features/upgrade_agent.feature b/e2e/_suites/fleet/features/upgrade_agent.feature index 3f2f33dac2..f08d83bee9 100644 --- a/e2e/_suites/fleet/features/upgrade_agent.feature +++ b/e2e/_suites/fleet/features/upgrade_agent.feature @@ -12,9 +12,9 @@ Scenario Outline: Upgrading an installed agent from Examples: Stale versions | stale-version | | latest | -| 8.4-SNAPSHOT | +| 8.4.0 | | 8.3.0 | | 8.2.0 | | 8.1.3 | | 8.1.0 | -| 7.17-SNAPSHOT | +| 7.17.8 | diff --git a/e2e/_suites/fleet/fleet_test.go b/e2e/_suites/fleet/fleet_test.go index 5501cbfd58..544e7ab93c 100644 --- a/e2e/_suites/fleet/fleet_test.go +++ b/e2e/_suites/fleet/fleet_test.go @@ -65,6 +65,20 @@ func afterScenario(fts *FleetTestSuite) { // exposed as container logs. For that reason we need to go through the installer abstraction agentInstaller, _ := installer.Attach(fts.currentContext, fts.getDeployer(), agentService, fts.InstallerType) + logsPath, _ := filepath.Abs(filepath.Join("..", "..", "..", "outputs", serviceName+uuid.New().String()+".tgz")) + _, err := shell.Execute(fts.currentContext, ".", "tar", "czf", logsPath, "--exclude", "*/components/*", "--exclude", "*/tmp/*", "--exclude", "*/downloads/*", "--exclude", "*/install/*", "--exclude", "/opt/Elastic/Agent/data/elastic-agent-*/elastic-agent", "/opt/Elastic/Agent/data") + if err != nil { + log.WithFields(log.Fields{ + "serviceName": serviceName, + "path": logsPath, + }).Warn("Failed to collect logs") + } else { + log.WithFields(log.Fields{ + "serviceName": serviceName, + "path": logsPath, + }).Info("Logs collected") + } + if log.IsLevelEnabled(log.DebugLevel) { err := agentInstaller.Logs(fts.currentContext) if err != nil { @@ -245,6 +259,14 @@ func bootstrapFleet(ctx context.Context, env map[string]string) error { }).Fatal("Elasticsearch Cluster is not healthy") } + _, err = kibanaClient.WaitForReady(ctx, 10*time.Minute) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "env": env, + }).Fatal("Kibana is not healthy") + } + err = kibanaClient.RecreateFleet(ctx) if err != nil { log.WithFields(log.Fields{ diff --git a/e2e/_suites/helm/Makefile b/e2e/_suites/helm/Makefile deleted file mode 100644 index c56d173398..0000000000 --- a/e2e/_suites/helm/Makefile +++ /dev/null @@ -1 +0,0 @@ -include ../../commons-test.mk diff --git a/e2e/_suites/helm/README.md b/e2e/_suites/helm/README.md deleted file mode 100644 index 587825ed39..0000000000 --- a/e2e/_suites/helm/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Observability Helm charts End-To-End tests - -## Motivation - -Our goal is for the Observability team to execute this automated e2e test suite while developing the Helm charts for APM Server, Filebeat and Metricbeat. The tests in this folder assert that the use cases (or scenarios) defined in the `features` directory are behaving as expected. - -## How do the tests work? - -At the topmost level, the test framework uses a BDD framework written in Go, where we set -the expected behavior of use cases in a feature file using Gherkin, and implementing the steps in Go code. -The provisioning of services is accomplished using [Kind (Kubernetes in Docker)](https://kind.sigs.k8s.io/https://kind.sigs.k8s.io/) and [Helm](https://helm.sh/) packages. - -The tests will follow this general high-level approach: - -1. Install runtime dependencies creating a Kind cluster using the locally installed `kind` binary, happening at before the test suite runs. -1. Execute BDD steps representing each scenario. Each step will return an Error if the behavior is not satisfied, marking the step and the scenario as failed, or will return `nil`. - -### Running the tests - -1. Clone this repository, say into a folder named `e2e-testing`. - - ``` shell - git clone git@github.com:elastic/e2e-testing.git - ``` - -2. Configure the version of the tools you want to test (Optional). - -This is an example of the optional configuration: - - ```shell - # Depending on the versions used, - export HELM_VERSION="3.9.0" # Helm version: for Helm v2.x.x we have to initialise Tiller right after the k8s cluster - export HELM_CHART_VERSION="7.17.3" # version of the Elastic's Observability Helm charts - export KUBERNETES_VERSION="1.25.0" # version of the cluster to be passed to kind - ``` - -3. Install dependencies. - - - Install Helm 3.9.0 - - Install Kind 0.14.0 - - Install Go, using the language version defined in the `.go-version` file at the root directory. We recommend using [GVM](https://github.com/andrewkroh/gvm), same as done in the CI, which will allow you to install multiple versions of Go, setting the Go environment in consequence: `eval "$(gvm 1.15.9)"` - - Godog and other test-related binaries will be installed in their supported versions when the project is first built, thanks to Go modules and Go build system. - -4. Run the tests. - - If you want to run the tests in Developer mode, which means reusing bakend services between test runs, please set this environment variable first: - - ```shell - # It won't tear down the backend services (k8s cluster) after a test suite. - export DEVELOPER_MODE=true - ``` - - ```shell - cd e2e/_suites/helm - OP_LOG_LEVEL=DEBUG go test -v - ``` - - Optionally, you can run only one of the feature files - ```shell - cd e2e/_suites/helm - OP_LOG_LEVEL=DEBUG go test -timeout 90m -v --godog.tags='@apm-server' - ``` - -## Diagnosing test failures - -### Setup failures - -Sometimes the tests could fail to configure or start the kubernetes cluster, etc. To determine why -this happened, look at your terminal log in DEBUG/TRACE mode. make sure there is not another test cluster: - -```shell -# Will remove existing test cluster -kind delete cluster --name helm-charts-test-suite -``` - -Note what you find and file a bug in the `elastic/e2e-testing` repository, requiring a fix to the helm suite to properly configure and start the product. - -### I cannot move on - -Please open an issue here: https://github.com/elastic/e2e-testing/issues/new diff --git a/e2e/_suites/helm/features/apm_server.feature b/e2e/_suites/helm/features/apm_server.feature deleted file mode 100644 index e02060a54d..0000000000 --- a/e2e/_suites/helm/features/apm_server.feature +++ /dev/null @@ -1,16 +0,0 @@ -@apm-server -Feature: APM Server - The Helm chart is following product recommended configuration for Kubernetes - -Scenario: The APM Server chart will create recommended K8S resources - Given a cluster is running - When the "apm-server" Elastic's helm chart is installed - Then a "Deployment" will manage the pods - And a "Service" will expose the pods as network services internal to the k8s cluster - And a "ConfigMap" resource contains the "apm-server.yml" key - And a "ServiceAccount" resource manages RBAC - And a "ClusterRole" resource manages RBAC - And a "ClusterRoleBinding" resource manages RBAC - And resource "limits" are applied - And resource "requests" are applied - And the "RollingUpdate" strategy can be used for "Deployment" during updates diff --git a/e2e/_suites/helm/features/filebeat.feature b/e2e/_suites/helm/features/filebeat.feature deleted file mode 100644 index dde4f573e7..0000000000 --- a/e2e/_suites/helm/features/filebeat.feature +++ /dev/null @@ -1,17 +0,0 @@ -@filebeat -Feature: Filebeat - The Helm chart is following product recommended configuration for Kubernetes - -Scenario: The Filebeat chart will create recommended K8S resources - Given a cluster is running - When the "filebeat" Elastic's helm chart is installed - Then a pod will be deployed on each node of the cluster by a DaemonSet - And a "ConfigMap" resource contains the "filebeat.yml" key - And a "ServiceAccount" resource manages RBAC - And a "ClusterRole" resource manages RBAC - And a "ClusterRoleBinding" resource manages RBAC - And resource "limits" are applied - And resource "requests" are applied - And the "RollingUpdate" strategy can be used during updates - And the "filebeat-config" volume is mounted at "/usr/share/filebeat/filebeat.yml" with subpath "filebeat.yml" - And the "data" volume is mounted at "/usr/share/filebeat/data" with no subpath diff --git a/e2e/_suites/helm/features/metricbeat.feature b/e2e/_suites/helm/features/metricbeat.feature deleted file mode 100644 index 058cd30605..0000000000 --- a/e2e/_suites/helm/features/metricbeat.feature +++ /dev/null @@ -1,23 +0,0 @@ -@metricbeat -Feature: Metricbeat - The Helm chart is following product recommended configuration for Kubernetes - -Scenario: The Metricbeat chart will create recommended K8S resources - Given a cluster is running - When the "metricbeat" Elastic's helm chart is installed - Then a pod will be deployed on each node of the cluster by a DaemonSet - And a "Deployment" will manage additional pods for metricsets querying internal services - And a "kube-state-metrics" chart will retrieve specific Kubernetes metrics - And a "ConfigMap" resource contains the "metricbeat.yml" key - And a "ConfigMap" resource contains the "kube-state-metrics-metricbeat.yml" key - And a "ServiceAccount" resource manages RBAC - And a "ClusterRole" resource manages RBAC - And a "ClusterRoleBinding" resource manages RBAC - And resource "limits" are applied - And resource "requests" are applied - And the "RollingUpdate" strategy can be used for "Deployment" during updates - And the "RollingUpdate" strategy can be used for "Daemonset" during updates - And the "data" volume is mounted at "/usr/share/metricbeat/data" with no subpath - And the "varrundockersock" volume is mounted at "/var/run/docker.sock" with no subpath - And the "proc" volume is mounted at "/hostfs/proc" with no subpath - And the "cgroup" volume is mounted at "/hostfs/sys/fs/cgroup" with no subpath diff --git a/e2e/_suites/helm/helm_charts_test.go b/e2e/_suites/helm/helm_charts_test.go deleted file mode 100644 index 97fd06c165..0000000000 --- a/e2e/_suites/helm/helm_charts_test.go +++ /dev/null @@ -1,754 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package main - -import ( - "context" - "fmt" - "os" - "strings" - "testing" - "time" - - "github.com/Jeffail/gabs/v2" - "github.com/cenkalti/backoff/v4" - "github.com/elastic/e2e-testing/internal/common" - "github.com/elastic/e2e-testing/internal/config" - "github.com/elastic/e2e-testing/internal/helm" - "github.com/elastic/e2e-testing/internal/kubectl" - "github.com/elastic/e2e-testing/internal/shell" - "github.com/elastic/e2e-testing/internal/utils" - "go.elastic.co/apm" - - "github.com/cucumber/godog" - "github.com/cucumber/godog/colors" - apme2e "github.com/elastic/e2e-testing/internal" - log "github.com/sirupsen/logrus" - flag "github.com/spf13/pflag" -) - -var helmManager helm.Manager - -//nolint:unused -var kubectlClient kubectl.Kubectl - -// helmVersion represents the default version used for Helm -var helmVersion = "3.x" - -// helmChartVersion represents the default version used for the Elastic Helm charts -var helmChartVersion = "7.17.3" - -// kubernetesVersion represents the default version used for Kubernetes -var kubernetesVersion = "1.25.0" - -var testSuite HelmChartTestSuite - -var tx *apm.Transaction -var stepSpan *apm.Span - -func setupSuite() { - config.Init() - - helmVersion = shell.GetEnv("HELM_VERSION", helmVersion) - helmChartVersion = shell.GetEnv("HELM_CHART_VERSION", helmChartVersion) - kubernetesVersion = shell.GetEnv("KUBERNETES_VERSION", kubernetesVersion) - - common.InitVersions() - - h, err := helm.Factory(helmVersion) - if err != nil { - log.Fatalf("Helm could not be initialised: %v", err) - } - - helmManager = h - - testSuite = HelmChartTestSuite{ - ClusterName: "helm-charts-test-suite", - KubernetesVersion: kubernetesVersion, - Version: helmChartVersion, - } -} - -// HelmChartTestSuite represents a test suite for a helm chart -// -//nolint:unused -type HelmChartTestSuite struct { - ClusterName string // the name of the cluster - KubernetesVersion string // the Kubernetes version for the test - Name string // the name of the chart - Version string // the helm chart version for the test - // instrumentation - currentContext context.Context -} - -func (ts *HelmChartTestSuite) aClusterIsRunning() error { - args := []string{"get", "clusters"} - - output, err := shell.Execute(context.Background(), ".", "kind", args...) - if err != nil { - log.WithField("error", err).Error("Could not check the status of the cluster.") - } - if output != ts.ClusterName { - return fmt.Errorf("the cluster is not running") - } - - log.WithFields(log.Fields{ - "output": output, - }).Debug("Cluster is running") - return nil -} - -func (ts *HelmChartTestSuite) addElasticRepo(ctx context.Context) error { - err := helmManager.AddRepo(ctx, "elastic", "https://helm.elastic.co") - if err != nil { - log.WithField("error", err).Error("Could not add Elastic Helm repo") - } - return err -} - -func (ts *HelmChartTestSuite) aResourceContainsTheKey(resource string, key string) error { - lowerResource := strings.ToLower(resource) - escapedKey := strings.ReplaceAll(key, ".", `\.`) - - output, err := kubectlClient.Run(ts.currentContext, "get", lowerResource, ts.getResourceName(resource), "-o", `jsonpath="{.data['`+escapedKey+`']}"`) - if err != nil { - return err - } - if output == "" { - return fmt.Errorf("there is no %s for the %s chart including %s", resource, ts.Name, key) - } - - log.WithFields(log.Fields{ - "output": output, - "name": ts.Name, - }).Debug("A " + resource + " resource contains the " + key + " key") - - return nil -} - -func (ts *HelmChartTestSuite) aResourceManagesRBAC(resource string) error { - lowerResource := strings.ToLower(resource) - - output, err := kubectlClient.Run(ts.currentContext, "get", lowerResource, ts.getResourceName(resource), "-o", `jsonpath="'{.metadata.labels.chart}'"`) - if err != nil { - return err - } - if output == "" { - return fmt.Errorf("there is no %s for the %s chart", resource, ts.Name) - } - - log.WithFields(log.Fields{ - "output": output, - "name": ts.Name, - }).Debug("A " + resource + " resource manages K8S RBAC") - - return nil -} - -func (ts *HelmChartTestSuite) aResourceWillExposePods(resourceType string) error { - selector, err := kubectlClient.GetResourceSelector(ts.currentContext, "deployment", ts.Name+"-"+ts.Name) - if err != nil { - return err - } - - maxTimeout := time.Duration(utils.TimeoutFactor) * time.Minute - - exp := utils.GetExponentialBackOff(maxTimeout) - retryCount := 1 - - // select by app label - selector = "app=" + selector - - checkEndpointsFn := func() error { - output, err := kubectlClient.GetStringResourcesBySelector(ts.currentContext, "endpoints", selector) - if err != nil { - log.WithFields(log.Fields{ - "elapsedTime": exp.GetElapsedTime(), - "error": err, - "selector": selector, - "resource": "endpoints", - "retry": retryCount, - }).Warn("Could not inspect resource with kubectl") - - retryCount++ - - return err - } - - jsonParsed, err := gabs.ParseJSON([]byte(output)) - if err != nil { - log.WithFields(log.Fields{ - "elapsedTime": exp.GetElapsedTime(), - "error": err, - "output": output, - "selector": selector, - "resource": "endpoints", - "retry": retryCount, - }).Warn("Could not parse JSON") - - retryCount++ - - return err - } - - subsets := jsonParsed.Path("items.0.subsets") - if len(subsets.Children()) == 0 { - log.WithFields(log.Fields{ - "elapsedTime": exp.GetElapsedTime(), - "resource": "endpoints", - "retry": retryCount, - "selector": selector, - }).Warn("Endpoints not present yet") - - retryCount++ - - return fmt.Errorf("there are no Endpoint subsets for the %s with the selector %s", resourceType, selector) - } - - log.WithFields(log.Fields{ - "elapsedTime": exp.GetElapsedTime(), - "resource": "endpoints", - "retry": retryCount, - "selector": selector, - }).Info("Endpoints found") - - return nil - } - - err = backoff.Retry(checkEndpointsFn, exp) - if err != nil { - return err - } - - return nil -} - -func (ts *HelmChartTestSuite) aResourceWillManagePods(resourceType string) error { - selector, err := kubectlClient.GetResourceSelector(ts.currentContext, "deployment", ts.Name+"-"+ts.Name) - if err != nil { - return err - } - - // select by app label - selector = "app=" + selector - - resources, err := ts.checkResources(resourceType, selector, 1) - if err != nil { - return err - } - - log.WithFields(log.Fields{ - "name": ts.Name, - "resources": resources, - }).Tracef("Checking the %s pods", resourceType) - - return nil -} - -func (ts *HelmChartTestSuite) checkResources(resourceType, selector string, min int) ([]interface{}, error) { - resources, err := kubectlClient.GetResourcesBySelector(ts.currentContext, resourceType, selector) - if err != nil { - return nil, err - } - - items := resources["items"].([]interface{}) - if len(items) < min { - return nil, fmt.Errorf("there are not %d %s for resource %s/%s-%s with the selector %s", min, resourceType, resourceType, ts.Name, ts.Name, selector) - } - - log.WithFields(log.Fields{ - "name": ts.Name, - "items": items, - }).Tracef("Checking for %d %s with selector %s", min, resourceType, selector) - - return items, nil -} - -func (ts *HelmChartTestSuite) createCluster(ctx context.Context, k8sVersion string) error { - span, _ := apm.StartSpanOptions(ctx, "Creating Kind cluster", "kind.cluster.create", apm.SpanOptions{ - Parent: apm.SpanFromContext(ctx).TraceContext(), - }) - defer span.End() - - args := []string{"create", "cluster", "--name", ts.ClusterName, "--image", "kindest/node:v" + k8sVersion} - - log.Trace("Creating cluster with kind") - output, err := shell.Execute(ctx, ".", "kind", args...) - if err != nil { - log.WithField("error", err).Error("Could not create the cluster") - return err - } - log.WithFields(log.Fields{ - "cluster": ts.ClusterName, - "k8sVersion": k8sVersion, - "output": output, - }).Info("Cluster created") - - return nil -} - -func (ts *HelmChartTestSuite) deleteChart() { - err := helmManager.DeleteChart(ts.currentContext, ts.Name) - if err != nil { - log.WithFields(log.Fields{ - "chart": ts.Name, - }).Error("Could not delete chart") - } -} - -func (ts *HelmChartTestSuite) destroyCluster(ctx context.Context) error { - args := []string{"delete", "cluster", "--name", ts.ClusterName} - - log.Trace("Deleting cluster") - output, err := shell.Execute(ctx, ".", "kind", args...) - if err != nil { - log.WithField("error", err).Error("Could not destroy the cluster") - return err - } - log.WithFields(log.Fields{ - "output": output, - "cluster": ts.ClusterName, - }).Debug("Cluster destroyed") - return nil -} - -func (ts *HelmChartTestSuite) elasticsHelmChartIsInstalled(chart string) error { - return ts.install(ts.currentContext, chart) -} - -// getFullName returns the name plus version, in lowercase, enclosed in quotes -func (ts *HelmChartTestSuite) getFullName() string { - return strings.ToLower("'" + ts.Name + "-" + ts.Version + "'") -} - -// getKubeStateName returns the kube-state-metrics name, in lowercase, enclosed in quotes -func (ts *HelmChartTestSuite) getKubeStateMetricsName() string { - return strings.ToLower("'" + ts.Name + "-kube-state-metrics'") -} - -// getPodName returns the name used in the app selector, in lowercase -func (ts *HelmChartTestSuite) getPodName() string { - if ts.Name == "apm-server" { - return strings.ToLower(ts.Name) - } - - return strings.ToLower(ts.Name + "-" + ts.Name) -} - -// getResourceName returns the name of the service, in lowercase, based on the k8s resource -func (ts *HelmChartTestSuite) getResourceName(resource string) string { - if resource == kubectl.ResourceTypes.ClusterRole { - return strings.ToLower(ts.Name + "-" + ts.Name + "-cluster-role") - } else if resource == kubectl.ResourceTypes.ClusterRoleBinding { - return strings.ToLower(ts.Name + "-" + ts.Name + "-cluster-role-binding") - } else if resource == kubectl.ResourceTypes.ConfigMap { - if ts.Name == "filebeat" || ts.Name == "metricbeat" { - return strings.ToLower(ts.Name + "-" + ts.Name + "-daemonset-config") - } - return strings.ToLower(ts.Name + "-" + ts.Name + "-config") - } else if resource == kubectl.ResourceTypes.Daemonset { - return strings.ToLower(ts.Name + "-" + ts.Name) - } else if resource == kubectl.ResourceTypes.Deployment { - if ts.Name == "metricbeat" { - return strings.ToLower(ts.Name + "-" + ts.Name + "-metrics") - } - return strings.ToLower(ts.Name + "-" + ts.Name) - } else if resource == kubectl.ResourceTypes.ServiceAccount { - return strings.ToLower(ts.Name + "-" + ts.Name) - } - - return "" -} - -func (ts *HelmChartTestSuite) install(ctx context.Context, chart string) error { - ts.Name = chart - - elasticChart := "elastic/" + ts.Name - - flags := []string{} - if chart == "elasticsearch" { - span, _ := apm.StartSpanOptions(ctx, "Adding Rancher Local Path Provisioner", "rancher.localpathprovisioner.add", apm.SpanOptions{ - Parent: apm.SpanFromContext(ctx).TraceContext(), - }) - defer span.End() - - // Rancher Local Path Provisioner and local-path storage class for Elasticsearch volumes - _, err := kubectlClient.Run(ctx, "apply", "-f", "https://raw.githubusercontent.com/rancher/local-path-provisioner/master/deploy/local-path-storage.yaml") - if err != nil { - log.Errorf("Could not apply Rancher Local Path Provisioner: %v", err) - return err - } - log.WithFields(log.Fields{ - "chart": ts.Name, - }).Info("Rancher Local Path Provisioner and local-path storage class for Elasticsearch volumes installed") - - maxTimeout := utils.TimeoutFactor * 100 - - log.Debug("Applying workaround to use Rancher's local-path storage class for Elasticsearch volumes") - flags = []string{"--wait", fmt.Sprintf("--timeout=%ds", maxTimeout), "--values", "https://raw.githubusercontent.com/elastic/helm-charts/master/elasticsearch/examples/kubernetes-kind/values.yaml"} - } - - return helmManager.InstallChart(ctx, ts.Name, elasticChart, ts.Version, flags) -} - -func (ts *HelmChartTestSuite) installRuntimeDependencies(ctx context.Context, dependencies ...string) error { - for _, dependency := range dependencies { - // Install Elasticsearch - err := ts.install(ctx, dependency) - if err != nil { - log.WithFields(log.Fields{ - "dependency": dependency, - "error": err, - }).Error("Could not install runtime dependency") - return err - } - } - - return nil -} - -func (ts *HelmChartTestSuite) podsManagedByDaemonSet() error { - output, err := kubectlClient.Run(ts.currentContext, "get", "daemonset", "--namespace=default", "-l", "app="+ts.Name+"-"+ts.Name, "-o", "jsonpath='{.items[0].metadata.labels.chart}'") - if err != nil { - return err - } - if output != ts.getFullName() { - return fmt.Errorf("there is no DaemonSet for the %s chart. Expected: %s, Actual: %s", ts.Name, ts.getFullName(), output) - } - - log.WithFields(log.Fields{ - "output": output, - "name": ts.Name, - }).Debug("A pod will be deployed on each node of the cluster by a DaemonSet") - - return nil -} - -func (ts *HelmChartTestSuite) resourceConstraintsAreApplied(constraint string) error { - output, err := kubectlClient.Run(ts.currentContext, "get", "pods", "-l", "app="+ts.getPodName(), "-o", "jsonpath='{.items[0].spec.containers[0].resources."+constraint+"}'") - if err != nil { - return err - } - if output == "" { - return fmt.Errorf("resource %s constraint for the %s chart is not applied. Actual: %s", constraint, ts.getFullName(), output) - } - - log.WithFields(log.Fields{ - "constraint": constraint, - "name": ts.Name, - "output": output, - }).Debug("Resource" + constraint + " is applied") - - return nil -} - -func (ts *HelmChartTestSuite) resourceWillManageAdditionalPodsForMetricsets(resource string) error { - lowerResource := strings.ToLower(resource) - - output, err := kubectlClient.Run(ts.currentContext, "get", lowerResource, ts.getResourceName(resource), "-o", "jsonpath='{.metadata.labels.chart}'") - if err != nil { - return err - } - if output != ts.getFullName() { - return fmt.Errorf("there is no %s for the %s chart. Expected: %s, Actual: %s", resource, ts.Name, ts.getFullName(), output) - } - - log.WithFields(log.Fields{ - "output": output, - "name": ts.Name, - }).Debug("A " + resource + " will manage additional pods for metricsets querying internal service") - - return nil -} - -func (ts *HelmChartTestSuite) strategyCanBeUsedDuringUpdates(strategy string) error { - return ts.strategyCanBeUsedForResourceDuringUpdates(strategy, kubectl.ResourceTypes.Daemonset) -} - -func (ts *HelmChartTestSuite) strategyCanBeUsedForResourceDuringUpdates(strategy string, resource string) error { - lowerResource := strings.ToLower(resource) - strategyKey := "strategy" - name := ts.getResourceName(resource) - - if resource == kubectl.ResourceTypes.Daemonset { - strategyKey = "updateStrategy" - } - - output, err := kubectlClient.Run(ts.currentContext, "get", lowerResource, name, "-o", `go-template={{.spec.`+strategyKey+`.type}}`) - if err != nil { - return err - } - if output != strategy { - return fmt.Errorf("there is no %s strategy to be used for %s on updates. Actual: %s", strategy, resource, output) - } - - log.WithFields(log.Fields{ - "strategy": strategy, - "resource": resource, - "name": name, - }).Debug("The strategy can be used for resource during updates") - - return nil -} - -func (ts *HelmChartTestSuite) volumeMountedWithNoSubpath(name string, mountPath string) error { - return ts.volumeMountedWithSubpath(name, mountPath, "") -} - -func (ts *HelmChartTestSuite) volumeMountedWithSubpath(name string, mountPath string, subPath string) error { - - getMountValues := func(key string) ([]string, error) { - // build the arguments for capturing the volume mounts - output, err := kubectlClient.Run(ts.currentContext, "get", "pods", "-l", "app="+ts.getPodName(), "-o", `jsonpath="{.items[0].spec.containers[0].volumeMounts[*]['`+key+`']}"`) - if err != nil { - return []string{}, err - } - output = strings.Trim(output, "\"") // remove enclosing double quotes - - return strings.Split(output, " "), nil - } - - // get volumeMounts names - names, err := getMountValues("name") - if err != nil { - return err - } - - // Find returns the smallest index i at which x == a[i], - // or len(a) if there is no such index. - find := func(a []string, x string) int { - for i, n := range a { - if x == n { - return i - } - } - return len(a) - } - - index := find(names, name) - if index == len(names) { - return fmt.Errorf("the mounted volume '%s' could not be found: %v", name, names) - } - - // get mounts paths - mountPaths, err := getMountValues("mountPath") - if err != nil { - return err - } - - if mountPath != mountPaths[index] { - return fmt.Errorf("the mounted volume for '%s' is not %s. Actual: %s", name, mountPath, mountPaths[index]) - } - - if subPath != "" { - // get subpaths - subPaths, err := getMountValues("subPath") - if err != nil { - return err - } - - if subPath != subPaths[index] { - return fmt.Errorf("the subPath for '%s' is not %s. Actual: %s", name, subPath, subPaths[index]) - } - } - - log.WithFields(log.Fields{ - "name": name, - "mountPath": mountPath, - "subPath": subPath, - }).Debug("The volumePath was found") - - return nil -} - -func (ts *HelmChartTestSuite) willRetrieveSpecificMetrics(chartName string) error { - kubeStateMetrics := "kube-state-metrics" - - output, err := kubectlClient.Run(ts.currentContext, "get", "deployment", ts.Name+"-"+kubeStateMetrics, "-o", "jsonpath='{.metadata.name}'") - if err != nil { - return err - } - if output != ts.getKubeStateMetricsName() { - return fmt.Errorf("there is no %s Deployment for the %s chart. Expected: %s, Actual: %s", kubeStateMetrics, ts.Name, ts.getKubeStateMetricsName(), output) - } - - log.WithFields(log.Fields{ - "output": output, - "name": ts.Name, - }).Debug("A " + kubeStateMetrics + " chart will retrieve specific Kubernetes metrics") - - return nil -} - -func InitializeHelmChartScenario(ctx *godog.ScenarioContext) { - ctx.Before(func(ctx context.Context, sc *godog.Scenario) (context.Context, error) { - log.Tracef("Before Helm scenario: %s", sc.Name) - - tx = apme2e.StartTransaction(sc.Name, "test.scenario") - tx.Context.SetLabel("suite", "helm") - - return ctx, nil - }) - - ctx.After(func(ctx context.Context, sc *godog.Scenario, err error) (context.Context, error) { - if err != nil { - e := apm.DefaultTracer.NewError(err) - e.Context.SetLabel("scenario", sc.Name) - e.Context.SetLabel("gherkin_type", "scenario") - e.Send() - } - - f := func() { - tx.End() - - apm.DefaultTracer.Flush(nil) - } - defer f() - - testSuite.deleteChart() - - log.Tracef("After Helm scenario: %s", sc.Name) - return ctx, nil - }) - - ctx.StepContext().Before(func(ctx context.Context, step *godog.Step) (context.Context, error) { - log.Tracef("Before step: %s", step.Text) - stepSpan = tx.StartSpan(step.Text, "test.scenario.step", nil) - testSuite.currentContext = apm.ContextWithSpan(context.Background(), stepSpan) - - return ctx, nil - }) - ctx.StepContext().After(func(ctx context.Context, step *godog.Step, status godog.StepResultStatus, err error) (context.Context, error) { - if err != nil { - e := apm.DefaultTracer.NewError(err) - e.Context.SetLabel("step", step.Text) - e.Context.SetLabel("gherkin_type", "step") - e.Context.SetLabel("step_status", status.String()) - e.Send() - } - - if stepSpan != nil { - stepSpan.End() - } - - log.Tracef("After step (%s): %s", status.String(), step.Text) - return ctx, nil - }) - - ctx.Step(`^a cluster is running$`, testSuite.aClusterIsRunning) - ctx.Step(`^the "([^"]*)" Elastic\'s helm chart is installed$`, testSuite.elasticsHelmChartIsInstalled) - ctx.Step(`^a pod will be deployed on each node of the cluster by a DaemonSet$`, testSuite.podsManagedByDaemonSet) - ctx.Step(`^a "([^"]*)" will manage additional pods for metricsets querying internal services$`, testSuite.resourceWillManageAdditionalPodsForMetricsets) - ctx.Step(`^a "([^"]*)" chart will retrieve specific Kubernetes metrics$`, testSuite.willRetrieveSpecificMetrics) - ctx.Step(`^a "([^"]*)" resource contains the "([^"]*)" key$`, testSuite.aResourceContainsTheKey) - ctx.Step(`^a "([^"]*)" resource manages RBAC$`, testSuite.aResourceManagesRBAC) - ctx.Step(`^the "([^"]*)" volume is mounted at "([^"]*)" with subpath "([^"]*)"$`, testSuite.volumeMountedWithSubpath) - ctx.Step(`^the "([^"]*)" volume is mounted at "([^"]*)" with no subpath$`, testSuite.volumeMountedWithNoSubpath) - ctx.Step(`^the "([^"]*)" strategy can be used during updates$`, testSuite.strategyCanBeUsedDuringUpdates) - ctx.Step(`^the "([^"]*)" strategy can be used for "([^"]*)" during updates$`, testSuite.strategyCanBeUsedForResourceDuringUpdates) - ctx.Step(`^resource "([^"]*)" are applied$`, testSuite.resourceConstraintsAreApplied) - - ctx.Step(`^a "([^"]*)" will manage the pods$`, testSuite.aResourceWillManagePods) - ctx.Step(`^a "([^"]*)" will expose the pods as network services internal to the k8s cluster$`, testSuite.aResourceWillExposePods) -} - -func InitializeHelmChartTestSuite(ctx *godog.TestSuiteContext) { - ctx.BeforeSuite(func() { - setupSuite() - log.Trace("Before Suite...") - toolsAreInstalled() - - var suiteTx *apm.Transaction - var suiteParentSpan *apm.Span - var suiteContext = context.Background() - - // instrumentation - defer apm.DefaultTracer.Flush(nil) - suiteTx = apme2e.StartTransaction("Initialise Helm", "test.suite") - defer suiteTx.End() - suiteParentSpan = suiteTx.StartSpan("Before Helm test suite", "test.suite.before", nil) - suiteContext = apm.ContextWithSpan(suiteContext, suiteParentSpan) - defer suiteParentSpan.End() - - err := testSuite.createCluster(suiteContext, testSuite.KubernetesVersion) - if err != nil { - return - } - err = testSuite.addElasticRepo(suiteContext) - if err != nil { - return - } - err = testSuite.installRuntimeDependencies(suiteContext, "elasticsearch") - if err != nil { - return - } - }) - - ctx.AfterSuite(func() { - f := func() { - apm.DefaultTracer.Flush(nil) - } - defer f() - - // instrumentation - var suiteTx *apm.Transaction - var suiteParentSpan *apm.Span - var suiteContext = context.Background() - defer apm.DefaultTracer.Flush(nil) - suiteTx = apme2e.StartTransaction("Tear Down Helm", "test.suite") - defer suiteTx.End() - suiteParentSpan = suiteTx.StartSpan("After Helm test suite", "test.suite.after", nil) - suiteContext = apm.ContextWithSpan(suiteContext, suiteParentSpan) - defer suiteParentSpan.End() - - if !common.DeveloperMode { - log.Trace("After Suite...") - err := testSuite.destroyCluster(suiteContext) - if err != nil { - return - } - } - }) - -} - -//nolint:unused -func toolsAreInstalled() { - binaries := []string{ - "kind", - "kubectl", - "helm", - } - - shell.CheckInstalledSoftware(binaries...) -} - -var opts = godog.Options{ - Output: colors.Colored(os.Stdout), - Format: "progress", // can define default values -} - -func init() { - godog.BindCommandLineFlags("godog.", &opts) // godog v0.11.0 (latest) -} - -func TestMain(m *testing.M) { - flag.Parse() - opts.Paths = flag.Args() - - status := godog.TestSuite{ - Name: "helm", - TestSuiteInitializer: InitializeHelmChartTestSuite, - ScenarioInitializer: InitializeHelmChartScenario, - Options: &opts, - }.Run() - - // Optional: Run `testing` package's logic besides godog. - if st := m.Run(); st > status { - status = st - } - - os.Exit(status) -} diff --git a/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go b/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go index 7b9c0f8cf4..2109d109b5 100644 --- a/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go +++ b/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go @@ -357,7 +357,7 @@ func (m *podsManager) waitForEventsCondition(podName string, conditionFn func(ct containerPath := fmt.Sprintf("%s/%s:/tmp/beats-events", m.kubectl.Namespace, instances[0]) localPath := filepath.Join(tmpDir, "events") - exp := backoff.WithContext(backoff.NewConstantBackOff(1*time.Second), ctx) + exp := backoff.WithContext(backoff.NewConstantBackOff(10*time.Second), ctx) return backoff.Retry(func() error { err := m.copyEvents(ctx, containerPath, localPath) if err != nil { @@ -406,7 +406,7 @@ func (m *podsManager) getPodInstances(ctx context.Context, podName string) (inst defer span.End() app := sanitizeName(podName) - ticker := backoff.WithContext(backoff.NewConstantBackOff(1*time.Second), ctx) + ticker := backoff.WithContext(backoff.NewConstantBackOff(10*time.Second), ctx) err = backoff.Retry(func() error { output, err := m.kubectl.Run(ctx, "get", "pods", "-l", "k8s-app="+app, diff --git a/internal/common/defaults.go b/internal/common/defaults.go index 3a96a8dd6e..5ec62229a2 100644 --- a/internal/common/defaults.go +++ b/internal/common/defaults.go @@ -36,7 +36,7 @@ const FleetProfileName = "fleet" const FleetServerAgentServiceName = "fleet-server" // BeatVersionBase is the base version of the Beat to use -var BeatVersionBase = "8.6.0-55d181cf-SNAPSHOT" +var BeatVersionBase = "8.6.0-2d1af9b1-SNAPSHOT" // BeatVersion is the version of the Beat to use // It can be overriden by BEAT_VERSION env var @@ -72,7 +72,13 @@ func init() { config.Init() elasticAgentWorkingDir = filepath.Join(config.OpDir(), ElasticAgentServiceName) - io.MkdirAll(elasticAgentWorkingDir) + err := io.MkdirAll(elasticAgentWorkingDir) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "path": elasticAgentWorkingDir, + }).Fatal("Could not create working directory for Elastic Agent") + } DeveloperMode = shell.GetEnvBool("DEVELOPER_MODE") if DeveloperMode { @@ -98,8 +104,13 @@ func GetElasticAgentWorkingPath(paths ...string) string { p := filepath.Join(elements...) // create dirs up to the last parent - io.MkdirAll(filepath.Dir(p)) - + err := io.MkdirAll(filepath.Dir(p)) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "path": filepath.Dir(p), + }).Fatal("Could not create working directory for Elastic Agent") + } return p } diff --git a/internal/config/compose/profiles/fleet/docker-compose.yml b/internal/config/compose/profiles/fleet/docker-compose.yml index 6f6bb38f69..6000152a74 100644 --- a/internal/config/compose/profiles/fleet/docker-compose.yml +++ b/internal/config/compose/profiles/fleet/docker-compose.yml @@ -2,9 +2,9 @@ version: '2.4' services: elasticsearch: healthcheck: - test: ["CMD", "curl", "-f", "-u", "elastic:changeme", "http://127.0.0.1:9200/"] - retries: 300 - interval: 1s + interval: 10s + retries: 100 + test: ["CMD-SHELL", "curl -s http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=500ms"] environment: - ES_JAVA_OPTS=-Xms1g -Xmx1g - network.host="0.0.0.0" @@ -18,7 +18,7 @@ services: - xpack.security.authc.token.timeout=60m - ELASTIC_USERNAME=admin - ELASTIC_PASSWORD=changeme - image: "docker.elastic.co/elasticsearch/elasticsearch:${stackVersion:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/elasticsearch/elasticsearch:${stackVersion:-8.6.0-2d1af9b1-SNAPSHOT}" platform: ${stackPlatform:-linux/amd64} ports: - "9200:9200" @@ -31,10 +31,10 @@ services: elasticsearch: condition: service_healthy healthcheck: - test: "curl -f http://localhost:5601/login | grep kbn-injected-metadata 2>&1 >/dev/null" - retries: 600 - interval: 1s - image: "docker.elastic.co/${kibanaDockerNamespace:-kibana}/kibana:${kibanaVersion:-8.6.0-55d181cf-SNAPSHOT}" + test: ["CMD-SHELL", "curl -u admin:changeme -s http://localhost:5601/api/status | grep -q 'All services are available'"] + retries: 60 + interval: 10s + image: "docker.elastic.co/${kibanaDockerNamespace:-kibana}/kibana:${kibanaVersion:-8.6.0-2d1af9b1-SNAPSHOT}" platform: ${stackPlatform:-linux/amd64} ports: - "5601:5601" diff --git a/internal/config/compose/services/elastic-agent/cloud/docker-compose.yml b/internal/config/compose/services/elastic-agent/cloud/docker-compose.yml index 16e6814143..dd08aca6ad 100644 --- a/internal/config/compose/services/elastic-agent/cloud/docker-compose.yml +++ b/internal/config/compose/services/elastic-agent/cloud/docker-compose.yml @@ -1,7 +1,7 @@ version: '2.4' services: elastic-agent: - image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-2d1af9b1-SNAPSHOT}" depends_on: elasticsearch: condition: service_healthy diff --git a/internal/config/compose/services/elastic-agent/docker-compose.yml b/internal/config/compose/services/elastic-agent/docker-compose.yml index 0d9fae3666..c776e57ce4 100644 --- a/internal/config/compose/services/elastic-agent/docker-compose.yml +++ b/internal/config/compose/services/elastic-agent/docker-compose.yml @@ -1,7 +1,7 @@ version: '2.4' services: elastic-agent: - image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-2d1af9b1-SNAPSHOT}" depends_on: elasticsearch: condition: service_healthy @@ -16,6 +16,10 @@ services: - "FLEET_ENROLLMENT_TOKEN=${fleetEnrollmentToken:-}" - "FLEET_INSECURE=${fleetInsecure:-0}" - "FLEET_URL=${fleetUrl:-}" + healthcheck: + test: ["CMD-SHELL", "curl -s -k http://localhost:8220/api/status | grep -q 'HEALTHY'"] + retries: 300 + interval: 10s platform: ${stackPlatform:-linux/amd64} ports: - "${fleetServerPort:-8220}:8220" diff --git a/internal/config/compose/services/elastic-agent/fleet-server/docker-compose.yml b/internal/config/compose/services/elastic-agent/fleet-server/docker-compose.yml index cf693b6f59..8108da7b22 100644 --- a/internal/config/compose/services/elastic-agent/fleet-server/docker-compose.yml +++ b/internal/config/compose/services/elastic-agent/fleet-server/docker-compose.yml @@ -1,7 +1,7 @@ version: '2.4' services: fleet-server: - image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/${elasticAgentDockerNamespace:-beats}/elastic-agent${elasticAgentDockerImageSuffix}:${elasticAgentTag:-8.6.0-2d1af9b1-SNAPSHOT}" depends_on: elasticsearch: condition: service_healthy diff --git a/internal/config/compose/services/elasticsearch/docker-compose.yml b/internal/config/compose/services/elasticsearch/docker-compose.yml index 5a58263f27..2e39ee75f5 100644 --- a/internal/config/compose/services/elasticsearch/docker-compose.yml +++ b/internal/config/compose/services/elasticsearch/docker-compose.yml @@ -9,7 +9,11 @@ services: - xpack.monitoring.collection.enabled=true - ELASTIC_USERNAME=elastic - ELASTIC_PASSWORD=changeme - image: "docker.elastic.co/observability-ci/elasticsearch:${elasticsearchTag:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/observability-ci/elasticsearch:${elasticsearchTag:-8.6.0-2d1af9b1-SNAPSHOT}" + healthcheck: + interval: 10s + retries: 100 + test: ["CMD-SHELL", "curl -u admin:changeme -s http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=500ms"] platform: ${elasticsearchPlatform:-linux/amd64} ports: - "9200:9200" diff --git a/internal/config/compose/services/kibana/docker-compose.yml b/internal/config/compose/services/kibana/docker-compose.yml index d1af4b6375..27d75a5985 100644 --- a/internal/config/compose/services/kibana/docker-compose.yml +++ b/internal/config/compose/services/kibana/docker-compose.yml @@ -6,9 +6,9 @@ services: - ELASTIC_USERNAME=elastic - ELASTIC_PASSWORD=changeme healthcheck: - test: "curl -f http://localhost:5601/login | grep kbn-injected-metadata 2>&1 >/dev/null" - retries: 600 - interval: 1s - image: "docker.elastic.co/kibana/kibana:${kibanaTag:-8.6.0-55d181cf-SNAPSHOT}" + test: ["CMD-SHELL", "curl -u admin:changeme -s http://localhost:5601/api/status | grep -q 'All services are available'"] + retries: 60 + interval: 10s + image: "docker.elastic.co/kibana/kibana:${kibanaTag:-8.6.0-2d1af9b1-SNAPSHOT}" ports: - "5601:5601" diff --git a/internal/config/compose/services/metricbeat/docker-compose.yml b/internal/config/compose/services/metricbeat/docker-compose.yml index b42b14a740..8d08c91e73 100644 --- a/internal/config/compose/services/metricbeat/docker-compose.yml +++ b/internal/config/compose/services/metricbeat/docker-compose.yml @@ -14,7 +14,7 @@ services: ] environment: - BEAT_STRICT_PERMS=${beatStricPerms:-false} - image: "docker.elastic.co/${metricbeatDockerNamespace:-beats}/metricbeat:${metricbeatTag:-8.6.0-55d181cf-SNAPSHOT}" + image: "docker.elastic.co/${metricbeatDockerNamespace:-beats}/metricbeat:${metricbeatTag:-8.6.0-2d1af9b1-SNAPSHOT}" labels: co.elastic.logs/module: "${serviceName}" platform: ${stackPlatform:-linux/amd64} diff --git a/internal/config/config.go b/internal/config/config.go index f6b69bd4be..7772c06869 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -255,7 +255,13 @@ func newConfig(workspace string) { } // initialize included profiles/services - extractProfileServiceConfig(Op, box) + err := extractProfileServiceConfig(Op, box) + if err != nil { + log.WithFields(log.Fields{ + "workspace": workspace, + }).Error("Could not extract packaged compose files") + return + } // add file system services and profiles readFilesFromFileSystem("services") diff --git a/internal/config/kubernetes/base/elasticsearch/deployment.yaml b/internal/config/kubernetes/base/elasticsearch/deployment.yaml index 0766f4a4f6..a52859de1e 100644 --- a/internal/config/kubernetes/base/elasticsearch/deployment.yaml +++ b/internal/config/kubernetes/base/elasticsearch/deployment.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - name: elasticsearch - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-55d181cf-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-2d1af9b1-SNAPSHOT envFrom: - configMapRef: name: elasticsearch-config diff --git a/internal/config/kubernetes/base/fleet-server/deployment.yaml b/internal/config/kubernetes/base/fleet-server/deployment.yaml index 2304e04c03..9fd6827afa 100644 --- a/internal/config/kubernetes/base/fleet-server/deployment.yaml +++ b/internal/config/kubernetes/base/fleet-server/deployment.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - name: fleet-server - image: docker.elastic.co/beats/elastic-agent:8.6.0-55d181cf-SNAPSHOT + image: docker.elastic.co/beats/elastic-agent:8.6.0-2d1af9b1-SNAPSHOT env: - name: FLEET_SERVER_ENABLE value: "1" diff --git a/internal/config/kubernetes/base/kibana/deployment.yaml b/internal/config/kubernetes/base/kibana/deployment.yaml index 3b323b44d4..806e439532 100644 --- a/internal/config/kubernetes/base/kibana/deployment.yaml +++ b/internal/config/kubernetes/base/kibana/deployment.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - name: kibana - image: docker.elastic.co/kibana/kibana:8.6.0-55d181cf-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-2d1af9b1-SNAPSHOT env: - name: ELASTICSEARCH_URL value: http://elasticsearch:9200 diff --git a/internal/deploy/compose.go b/internal/deploy/compose.go index 0b7a8f8f6e..6efe77b36e 100644 --- a/internal/deploy/compose.go +++ b/internal/deploy/compose.go @@ -86,6 +86,8 @@ func (sm *DockerServiceManager) ExecCommandInService(ctx context.Context, profil if detach { composeArgs = append(composeArgs, "-d") } + composeArgs = append(composeArgs, "--pull") + composeArgs = append(composeArgs, "--quiet-pull") composeArgs = append(composeArgs, "--index", fmt.Sprintf("%d", image.Scale)) composeArgs = append(composeArgs, serviceName) composeArgs = append(composeArgs, cmds...) diff --git a/internal/deploy/docker.go b/internal/deploy/docker.go index 984584d3ca..4338049728 100644 --- a/internal/deploy/docker.go +++ b/internal/deploy/docker.go @@ -177,7 +177,7 @@ func (c *dockerDeploymentManifest) Logs(ctx context.Context, service ServiceRequ defer span.End() manifest, _ := c.GetServiceManifest(ctx, service) - _, err := shell.Execute(ctx, ".", "docker", "logs", manifest.Name) + logs, err := shell.Execute(ctx, ".", "docker", "logs", manifest.Name) if err != nil { log.WithFields(log.Fields{ "error": err, @@ -186,6 +186,8 @@ func (c *dockerDeploymentManifest) Logs(ctx context.Context, service ServiceRequ return err } + // print logs as is, including tabs and line breaks + fmt.Println(logs) return nil } diff --git a/internal/deploy/docker_client.go b/internal/deploy/docker_client.go index f50cf39d1a..d27d2ac4ed 100644 --- a/internal/deploy/docker_client.go +++ b/internal/deploy/docker_client.go @@ -68,7 +68,16 @@ func buildTarForDeployment(file *os.File) (bytes.Buffer, error) { return bytes.Buffer{}, err } - tarWriter.Write(b) + _, err = tarWriter.Write(b) + if err != nil { + log.WithFields(log.Fields{ + "fileInfoName": fileInfo.Name(), + "size": fileInfo.Size(), + "error": err, + }).Error("Could not write TAR file") + return bytes.Buffer{}, fmt.Errorf("could not write TAR file: %v", err) + } + defer tarWriter.Close() return buffer, nil @@ -124,7 +133,10 @@ func CopyFileToContainer(ctx context.Context, containerName string, srcPath stri return err } - writer.Write(b) + _, err = writer.Write(b) + if err != nil { + return err + } } err = dockerClient.CopyToContainer(ctx, containerName, parentDir, &buffer, types.CopyToContainerOptions{AllowOverwriteDirWithFile: true}) @@ -399,7 +411,14 @@ func LoadImage(imagePath string) error { } buf := new(bytes.Buffer) - buf.ReadFrom(imageLoadResponse.Body) + _, err = buf.ReadFrom(imageLoadResponse.Body) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "image": fileNamePath, + }).Error("Could not read the Docker image load response.") + return err + } log.WithFields(log.Fields{ "image": fileNamePath, diff --git a/internal/deploy/elastic_package.go b/internal/deploy/elastic_package.go index d6fe0d3888..3f529c0d9e 100644 --- a/internal/deploy/elastic_package.go +++ b/internal/deploy/elastic_package.go @@ -276,7 +276,7 @@ func (ep *EPServiceManager) Logs(ctx context.Context, service ServiceRequest) er defer span.End() manifest, _ := ep.GetServiceManifest(context.Background(), service) - _, err := shell.Execute(ep.Context, ".", "docker", "logs", manifest.Name) + logs, err := shell.Execute(ep.Context, ".", "docker", "logs", manifest.Name) if err != nil { log.WithFields(log.Fields{ "error": err, @@ -285,6 +285,8 @@ func (ep *EPServiceManager) Logs(ctx context.Context, service ServiceRequest) er return err } + // print logs as is, including tabs and line breaks + fmt.Println(logs) return nil } diff --git a/internal/deploy/kubernetes.go b/internal/deploy/kubernetes.go index f7ecacd3dd..bf41105bcc 100644 --- a/internal/deploy/kubernetes.go +++ b/internal/deploy/kubernetes.go @@ -184,7 +184,7 @@ func (c *kubernetesDeploymentManifest) Logs(ctx context.Context, service Service defer span.End() kubectl = cluster.Kubectl().WithNamespace(ctx, "default") - _, err := kubectl.Run(ctx, "logs", "deployment/"+service.Name) + logs, err := kubectl.Run(ctx, "logs", "deployment/"+service.Name) if err != nil { log.WithFields(log.Fields{ "error": err, @@ -193,6 +193,8 @@ func (c *kubernetesDeploymentManifest) Logs(ctx context.Context, service Service return err } + // print logs as is, including tabs and line breaks + fmt.Println(logs) return nil } diff --git a/internal/installer/elasticagent_tar.go b/internal/installer/elasticagent_tar.go index 97573b5aa1..b22ee258bd 100644 --- a/internal/installer/elasticagent_tar.go +++ b/internal/installer/elasticagent_tar.go @@ -177,7 +177,8 @@ func (i *elasticAgentTARPackage) Preinstall(ctx context.Context) error { } srcPath := common.GetElasticAgentWorkingPath(fmt.Sprintf("%s-%s-%s-%s", artifact, downloads.GetSnapshotVersion(version), metadata.Os, metadata.Arch)) - output, _ := i.Exec(ctx, []string{"mv", srcPath, common.GetElasticAgentWorkingPath(artifact)}) + _, _ = i.Exec(ctx, []string{"rm", "-fr", common.GetElasticAgentWorkingPath(artifact)}) + output, _ := i.Exec(ctx, []string{"mv", "-f", srcPath, common.GetElasticAgentWorkingPath(artifact)}) log.WithFields(log.Fields{ "output": output, "artifact": artifact, @@ -249,7 +250,7 @@ func (i *elasticAgentTARPackage) Stop(ctx context.Context) error { // Uninstall uninstalls a TAR package func (i *elasticAgentTARPackage) Uninstall(ctx context.Context) error { - cmds := []string{"elastic-agent", "uninstall", "-f"} + cmds := []string{"/opt/Elastic/Agent/elastic-agent", "uninstall", "-f"} span, _ := apm.StartSpanOptions(ctx, "Uninstalling Elastic Agent", "elastic-agent.tar.uninstall", apm.SpanOptions{ Parent: apm.SpanFromContext(ctx).TraceContext(), }) diff --git a/internal/installer/elasticagent_tar_macos.go b/internal/installer/elasticagent_tar_macos.go index 193059e9f6..8bd3610e50 100644 --- a/internal/installer/elasticagent_tar_macos.go +++ b/internal/installer/elasticagent_tar_macos.go @@ -178,7 +178,8 @@ func (i *elasticAgentTARDarwinPackage) Preinstall(ctx context.Context) error { } srcPath := common.GetElasticAgentWorkingPath(fmt.Sprintf("%s-%s-%s-%s", artifact, downloads.GetSnapshotVersion(version), metadata.Os, metadata.Arch)) - output, _ := i.Exec(ctx, []string{"mv", srcPath, common.GetElasticAgentWorkingPath("elastic-agent")}) + _, _ = i.Exec(ctx, []string{"rm", "-fr", common.GetElasticAgentWorkingPath("elastic-agent")}) + output, _ := i.Exec(ctx, []string{"mv", "-f", srcPath, common.GetElasticAgentWorkingPath("elastic-agent")}) log.WithField("output", output).Trace("Moved elastic-agent") return nil } diff --git a/internal/installer/elasticagent_zip.go b/internal/installer/elasticagent_zip.go index 713e362f54..9e50e8bd88 100644 --- a/internal/installer/elasticagent_zip.go +++ b/internal/installer/elasticagent_zip.go @@ -226,7 +226,10 @@ func extractZIPFile(src string, target string) error { filePath := filepath.Join(target, f.Name) if f.FileInfo().IsDir() { - os.MkdirAll(filePath, os.ModePerm) + err := os.MkdirAll(filePath, os.ModePerm) + if err != nil { + return err + } continue } diff --git a/internal/kibana/server.go b/internal/kibana/server.go index 249cc9e4df..9b0e5bfed3 100644 --- a/internal/kibana/server.go +++ b/internal/kibana/server.go @@ -333,14 +333,14 @@ func (c *Client) WaitForReady(ctx context.Context, maxTimeoutMinutes time.Durati }) defer span.End() - statusCode, respBody, err := c.get(ctx, "status") + statusCode, respBody, err := c.get(ctx, "api/status") if err != nil { log.WithFields(log.Fields{ "error": err, "statusCode": statusCode, "respBody": string(respBody), "retry": retryCount, - "statusEndpoint": fmt.Sprintf("%s/status", BaseURL), + "statusEndpoint": fmt.Sprintf("%s/api/status", BaseURL), "elapsedTime": exp.GetElapsedTime(), }).Warn("The Kibana instance is not healthy yet") @@ -349,9 +349,42 @@ func (c *Client) WaitForReady(ctx context.Context, maxTimeoutMinutes time.Durati return err } + jsonResponse, err := gabs.ParseJSON(respBody) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "statusCode": statusCode, + "respBody": string(respBody), + "retry": retryCount, + "statusEndpoint": fmt.Sprintf("%s/api/status", BaseURL), + "elapsedTime": exp.GetElapsedTime(), + }).Warn("The Kibana instance is not available yet") + + retryCount++ + + return err + } + + status := jsonResponse.Path("status.overall.level").Data().(string) + if status != "available" { + err := errors.New("Kibana is not available yet") + log.WithFields(log.Fields{ + "error": err, + "statusCode": statusCode, + "respBody": status, + "retry": retryCount, + "statusEndpoint": fmt.Sprintf("%s/api/status", BaseURL), + "elapsedTime": exp.GetElapsedTime(), + }).Warn("The Kibana instance is not available yet :" + status) + + retryCount++ + + return err + } + log.WithFields(log.Fields{ "retries": retryCount, - "statusEndpoint": fmt.Sprintf("%s/status", BaseURL), + "statusEndpoint": fmt.Sprintf("%s/api/status", BaseURL), "elapsedTime": exp.GetElapsedTime(), }).Info("The Kibana instance is healthy") diff --git a/internal/utils/retry.go b/internal/utils/retry.go index c1ce828388..6306967cb5 100644 --- a/internal/utils/retry.go +++ b/internal/utils/retry.go @@ -22,10 +22,10 @@ func init() { // GetExponentialBackOff returns a preconfigured exponential backoff instance func GetExponentialBackOff(elapsedTime time.Duration) *backoff.ExponentialBackOff { var ( - initialInterval = 500 * time.Millisecond + initialInterval = 10 * time.Second randomizationFactor = 0.5 multiplier = 2.0 - maxInterval = 5 * time.Second + maxInterval = 30 * time.Second maxElapsedTime = elapsedTime ) diff --git a/internal/utils/utils.go b/internal/utils/utils.go index 13c28262c8..573869a2b5 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -51,7 +51,14 @@ func DownloadFile(downloadRequest *DownloadRequest) error { var filePath string if downloadRequest.DownloadPath == "" { tempParentDir := filepath.Join(os.TempDir(), uuid.NewString()) - internalio.MkdirAll(tempParentDir) + err := internalio.MkdirAll(tempParentDir) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "path": tempParentDir, + }).Error("Error creating directory") + return err + } filePath = filepath.Join(tempParentDir, uuid.NewString()) downloadRequest.DownloadPath = filePath } else { diff --git a/pkg/downloads/buckets.go b/pkg/downloads/buckets.go index 8b94a07914..e47878b899 100644 --- a/pkg/downloads/buckets.go +++ b/pkg/downloads/buckets.go @@ -177,7 +177,7 @@ func (r *ProjectURLResolver) Resolve() (string, string, string) { "object": r.FileName, "prefix": prefix, "project": artifact, - }).Debug("Resolving URL from Project resolver") + }).Info("Resolving URL from Project resolver") return r.Bucket, prefix, r.FileName } diff --git a/pkg/downloads/releases.go b/pkg/downloads/releases.go index 1a3339e176..37c4078ca9 100644 --- a/pkg/downloads/releases.go +++ b/pkg/downloads/releases.go @@ -193,7 +193,7 @@ func (r *ReleaseURLResolver) Resolve() (string, string, error) { "retry": retryCount, "statusEndpoint": r.URL, "elapsedTime": exp.GetElapsedTime(), - }).Warn("Download could not be found at the Elastic downloads API") + }).Debug("Download could not be found at the Elastic downloads API") return nil } @@ -203,7 +203,7 @@ func (r *ReleaseURLResolver) Resolve() (string, string, error) { "retry": retryCount, "statusEndpoint": r.URL, "elapsedTime": exp.GetElapsedTime(), - }).Warn("The Elastic downloads API is not available yet") + }).Debug("The Elastic downloads API is not available yet") retryCount++ @@ -215,7 +215,7 @@ func (r *ReleaseURLResolver) Resolve() (string, string, error) { "retries": retryCount, "statusEndpoint": r.URL, "elapsedTime": exp.GetElapsedTime(), - }).Debug("Download was found in the Elastic downloads API") + }).Info("Download was found in the Elastic downloads API") return nil }