From b93bf2914234ef83c848b9977b2905eb07dc35da Mon Sep 17 00:00:00 2001 From: Yadnesh Kulkarni Date: Thu, 4 Jul 2024 16:13:59 +0530 Subject: [PATCH] Update telemetry adoption guide --- .../modules/proc_adopting-autoscaling.adoc | 102 +++++------- ...ng-compute-services-to-the-data-plane.adoc | 2 + .../proc_adopting-telemetry-services.adoc | 149 +++++++++++------- .../proc_deploying-backend-services.adoc | 2 - .../proc_stopping-openstack-services.adoc | 13 +- tests/playbooks/test_minimal.yaml | 6 + tests/playbooks/test_rollback_minimal.yaml | 6 + tests/playbooks/test_rollback_with_ceph.yaml | 6 + tests/playbooks/test_with_ceph.yaml | 6 + .../roles/autoscaling_adoption/meta/main.yaml | 2 + .../autoscaling_adoption/tasks/main.yaml | 42 +++++ .../roles/backend_services/defaults/main.yaml | 2 + tests/roles/backend_services/tasks/main.yaml | 6 + .../roles/dataplane_adoption/tasks/main.yaml | 2 + tests/roles/heat_adoption/tasks/main.yaml | 2 + .../stop_openstack_services/tasks/main.yaml | 13 +- tests/roles/telemetry_adoption/meta/main.yaml | 2 + .../roles/telemetry_adoption/tasks/main.yaml | 110 +++++++++++++ tests/secrets.sample.yaml | 2 + 19 files changed, 353 insertions(+), 122 deletions(-) create mode 100644 tests/roles/autoscaling_adoption/meta/main.yaml create mode 100644 tests/roles/autoscaling_adoption/tasks/main.yaml create mode 100644 tests/roles/telemetry_adoption/meta/main.yaml create mode 100644 tests/roles/telemetry_adoption/tasks/main.yaml diff --git a/docs_user/modules/proc_adopting-autoscaling.adoc b/docs_user/modules/proc_adopting-autoscaling.adoc index 0dfbc6c92..ad5c84515 100644 --- a/docs_user/modules/proc_adopting-autoscaling.adoc +++ b/docs_user/modules/proc_adopting-autoscaling.adoc @@ -1,6 +1,6 @@ [id="adopting-autoscaling_{context}"] -= Adopting autoscaling += Adopting Autoscaling services Adopting autoscaling means that an existing `OpenStackControlPlane` custom resource (CR), where Aodh services are supposed to be disabled, should be patched to start the service with the configuration parameters provided by the source environment. @@ -20,48 +20,22 @@ should be already adopted. . Patch the `OpenStackControlPlane` CR to deploy autoscaling services: + ---- -cat << EOF > aodh_patch.yaml +oc patch openstackcontrolplane openstack --type=merge --patch ' spec: - autoscaling: + telemetry: enabled: true - prometheus: - deployPrometheus: false - aodh: - customServiceConfig: | - [DEFAULT] - debug=true - secret: osp-secret -ifeval::["{build}" != "downstream"] - apiImage: "quay.io/podified-antelope-centos9/openstack-aodh-api:current-podified" - evaluatorImage: "quay.io/podified-antelope-centos9/openstack-aodh-evaluator:current-podified" - notifierImage: "quay.io/podified-antelope-centos9/openstack-aodh-notifier:current-podified" - listenerImage: "quay.io/podified-antelope-centos9/openstack-aodh-listener:current-podified" -endif::[] -ifeval::["{build}" == "downstream"] - apiImage: "registry.redhat.io/rhosp-dev-preview/openstack-aodh-api-rhel9:18.0" - evaluatorImage: "registry.redhat.io/rhosp-dev-preview/openstack-aodh-evaluator-rhel9:18.0" - notifierImage: "registry.redhat.io/rhosp-dev-preview/openstack-aodh-notifier-rhel9:18.0" - listenerImage: "registry.redhat.io/rhosp-dev-preview/openstack-aodh-listener-rhel9:18.0" -endif::[] - passwordSelectors: - databaseAccount: aodh - databaseInstance: openstack - memcachedInstance: memcached -EOF ----- - -. Optional: If you have previously backed up your {OpenStackShort} services configuration file from the old environment, you can use os-diff to compare and make sure the configuration is correct. This will producre the difference between both ini configuration files: -+ ----- -os-diff diff /tmp/collect_tripleo_configs/aodh/etc/aodh/aodh.conf aodh_patch.yaml --crd ----- -+ -For more information, see xref:reviewing-the-openstack-control-plane-configuration_{context}[Reviewing the {rhos_prev_long} control plane configuration]. - -. Patch the `OpenStackControlPlane` CR to deploy Aodh services: -+ ----- -oc patch openstackcontrolplane openstack --type=merge --patch-file aodh_patch.yaml + template: + autoscaling: + enabled: true + aodh: + passwordSelector: + aodhService: AodhPassword + databaseAccount: aodh + databaseInstance: openstack + secret: osp-secret + serviceUser: aodh + heatInstance: heat +' ---- .Verification @@ -69,7 +43,7 @@ oc patch openstackcontrolplane openstack --type=merge --patch-file aodh_patch.ya . If autoscaling services are enabled, inspect Aodh pods: + ---- -AODH_POD=`oc get pods -l service=aodh | tail -n 1 | cut -f 1 -d' '` +AODH_POD=`oc get pods -l service=aodh -n openstack | tail -n 1 | cut -f 1 -d' '` oc exec -t $AODH_POD -c aodh-api -- cat /etc/aodh/aodh.conf ---- @@ -77,30 +51,34 @@ oc exec -t $AODH_POD -c aodh-api -- cat /etc/aodh/aodh.conf + ---- openstack endpoint list | grep aodh -| 6a805bd6c9f54658ad2f24e5a0ae0ab6 | regionOne | aodh | network | True | public | http://aodh-public-openstack.apps-crc.testing | -| b943243e596847a9a317c8ce1800fa98 | regionOne | aodh | network | True | internal | http://aodh-internal.openstack.svc:9696 | -| f97f2b8f7559476bb7a5eafe3d33cee7 | regionOne | aodh | network | True | admin | http://192.168.122.99:9696 | +| d05d120153cd4f9b8310ac396b572926 | regionOne | aodh | alarming | True | internal | http://aodh-internal.openstack.svc:8042 | +| d6daee0183494d7a9a5faee681c79046 | regionOne | aodh | alarming | True | public | http://aodh-public.openstack.svc:8042 | ---- -. Create sample resources. You can test whether you can create alarms: +.Autoscaling template adoption + +* `PrometheusAlarm` alarm type must be used instead of `GnocchiAggregationByResourcesAlarm` + +* Create Aodh alarms of type prometheus + ---- -openstack alarm create \ ---name low_alarm \ ---type gnocchi_resources_threshold \ ---metric cpu \ ---resource-id b7ac84e4-b5ca-4f9e-a15c-ece7aaf68987 \ ---threshold 35000000000 \ ---comparison-operator lt \ ---aggregation-method rate:mean \ ---granularity 300 \ +openstack alarm create --name high_cpu_alarm \ +--type prometheus \ +--query "(rate(ceilometer_cpu{resource_name=~'cirros'})) * 100" \ +--alarm-action 'log://' \ +--granularity 15 \ --evaluation-periods 3 \ ---alarm-action 'log:\\' \ ---ok-action 'log:\\' \ ---resource-type instance +--comparison-operator gt \ +--threshold 7000000000 ---- -//=== (TODO) - -//* Include adopted autoscaling heat templates -//* Include adopted Aodh alarm create commands of type prometheus +* Verify the state of alarm ++ +---- +openstack alarm list ++--------------------------------------+------------+------------------+-------------------+----------+ +| alarm_id | type | name | state | severity | enabled | ++--------------------------------------+------------+------------------+-------------------+----------+ +| 209dc2e9-f9d6-40e5-aecc-e767ce50e9c0 | prometheus | prometheus_alarm | ok | low | True | ++--------------------------------------+------------+------------------+-------------------+----------+ +---- diff --git a/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc b/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc index 705b24140..312dd5b6b 100644 --- a/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc +++ b/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc @@ -261,6 +261,7 @@ spec: - nova - ovn - neutron-metadata + - telemetry env: - name: ANSIBLE_CALLBACKS_ENABLED value: "profile_tasks" @@ -426,6 +427,7 @@ spec: - nova - ovn - neutron-metadata + - telemetry nodeTemplate: extraMounts: - extraVolType: Ceph diff --git a/docs_user/modules/proc_adopting-telemetry-services.adoc b/docs_user/modules/proc_adopting-telemetry-services.adoc index e5e1b874a..2504cd832 100644 --- a/docs_user/modules/proc_adopting-telemetry-services.adoc +++ b/docs_user/modules/proc_adopting-telemetry-services.adoc @@ -14,56 +14,80 @@ This guide also assumes that: * Previous Adoption steps completed. MariaDB, the {identity_service_first_ref} and the data plane should be already adopted. //kgilliga:Should this procedure be moved after the "Adopting the data plane" chapter? -.Procedure - -. Patch the `OpenStackControlPlane` CR to deploy Ceilometer services: -// TODO(jistr): There are still some quay.io images in the downstream build. +* Patch the `OpenStackControlPlane` CR to deploy `cluster-observability-operator`: + ---- -cat << EOF > ceilometer_patch.yaml +oc create -f - < polling.yaml ---- -sources: - - name: pollsters - interval: 300 - meters: - - volume.size - - image.size - - cpu - - memory -EOF - -oc patch secret ceilometer-config-data --patch="{\"data\": { \"polling.yaml\": \"$(base64 -w0 polling.yaml)\"}}" +oc patch openstackcontrolplane openstack --type=merge --patch ' +spec: + telemetry: + template: + logging: + enabled: false + ipaddr: 172.17.0.80 + port: 10514 + cloNamespace: openshift-logging +' ---- diff --git a/docs_user/modules/proc_deploying-backend-services.adoc b/docs_user/modules/proc_deploying-backend-services.adoc index f06338352..5eeb7fedd 100644 --- a/docs_user/modules/proc_deploying-backend-services.adoc +++ b/docs_user/modules/proc_deploying-backend-services.adoc @@ -53,7 +53,6 @@ For example, in developer environments with {OpenStackPreviousInstaller} Standal ---- AODH_PASSWORD=$(cat ~/tripleo-standalone-passwords.yaml | grep ' AodhPassword:' | awk -F ': ' '{ print $2; }') BARBICAN_PASSWORD=$(cat ~/tripleo-standalone-passwords.yaml | grep ' BarbicanPassword:' | awk -F ': ' '{ print $2; }') -CEILOMETER_METERING_SECRET=$(cat ~/tripleo-standalone-passwords.yaml | grep ' CeilometerMeteringSecret:' | awk -F ': ' '{ print $2; }') CEILOMETER_PASSWORD=$(cat ~/tripleo-standalone-passwords.yaml | grep ' CeilometerPassword:' | awk -F ': ' '{ print $2; }') CINDER_PASSWORD=$(cat ~/tripleo-standalone-passwords.yaml | grep ' CinderPassword:' | awk -F ': ' '{ print $2; }') GLANCE_PASSWORD=$(cat ~/tripleo-standalone-passwords.yaml | grep ' GlancePassword:' | awk -F ': ' '{ print $2; }') @@ -105,7 +104,6 @@ account passwords from the original deployment: ---- $ oc set data secret/osp-secret "AodhPassword=$AODH_PASSWORD" $ oc set data secret/osp-secret "BarbicanPassword=$BARBICAN_PASSWORD" -$ oc set data secret/osp-secret "CeilometerMeteringSecret=$CEILOMETER_METERING_SECRET" $ oc set data secret/osp-secret "CeilometerPassword=$CEILOMETER_PASSWORD" $ oc set data secret/osp-secret "CinderPassword=$CINDER_PASSWORD" $ oc set data secret/osp-secret "GlancePassword=$GLANCE_PASSWORD" diff --git a/docs_user/modules/proc_stopping-openstack-services.adoc b/docs_user/modules/proc_stopping-openstack-services.adoc index 195887239..c22fa387e 100644 --- a/docs_user/modules/proc_stopping-openstack-services.adoc +++ b/docs_user/modules/proc_stopping-openstack-services.adoc @@ -57,7 +57,14 @@ sudo pcs constraint remove order-ceph-nfs-openstack-manila-share-Optional + ---- # Update the services list to be stopped -ServicesToStop=("tripleo_horizon.service" +ServicesToStop=("tripleo_aodh_api.service" + "tripleo_aodh_api_cron.service" + "tripleo_aodh_evaluator.service" + "tripleo_aodh_listener.service" + "tripleo_aodh_notifier.service" + "tripleo_ceilometer_agent_central.service" + "tripleo_ceilometer_agent_notification.service" + "tripleo_horizon.service" "tripleo_keystone.service" "tripleo_barbican_api.service" "tripleo_barbican_worker.service" @@ -67,7 +74,11 @@ ServicesToStop=("tripleo_horizon.service" "tripleo_cinder_scheduler.service" "tripleo_cinder_volume.service" "tripleo_cinder_backup.service" + "tripleo_collectd.service" "tripleo_glance_api.service" + "tripleo_gnocchi_api.service" + "tripleo_gnocchi_metricd.service" + "tripleo_gnocchi_statsd.service" "tripleo_manila_api.service" "tripleo_manila_api_cron.service" "tripleo_manila_scheduler.service" diff --git a/tests/playbooks/test_minimal.yaml b/tests/playbooks/test_minimal.yaml index 6ce2229fc..1e615ce30 100644 --- a/tests/playbooks/test_minimal.yaml +++ b/tests/playbooks/test_minimal.yaml @@ -61,6 +61,12 @@ - role: heat_adoption tags: - heat_adoption + - role: telemetry_adoption + tags: + - telemetry_adoption + - role: autoscaling_adoption + tags: + - autoscaling_adoption - role: stop_remaining_services tags: - stop_remaining_services diff --git a/tests/playbooks/test_rollback_minimal.yaml b/tests/playbooks/test_rollback_minimal.yaml index 4810bb82a..000e1d3ee 100644 --- a/tests/playbooks/test_rollback_minimal.yaml +++ b/tests/playbooks/test_rollback_minimal.yaml @@ -57,6 +57,12 @@ - role: heat_adoption tags: - heat_adoption + - role: telemetry_adoption + tags: + - telemetry_adoption + - role: autoscaling_adoption + tags: + - autoscaling_adoption - role: stop_remaining_services tags: - stop_remaining_services diff --git a/tests/playbooks/test_rollback_with_ceph.yaml b/tests/playbooks/test_rollback_with_ceph.yaml index e03083d59..069e3f24f 100644 --- a/tests/playbooks/test_rollback_with_ceph.yaml +++ b/tests/playbooks/test_rollback_with_ceph.yaml @@ -62,6 +62,12 @@ - role: heat_adoption tags: - heat_adoption + - role: telemetry_adoption + tags: + - telemetry_adoption + - role: autoscaling_adoption + tags: + - autoscaling_adoption - role: manila_adoption tags: - manila_adoption diff --git a/tests/playbooks/test_with_ceph.yaml b/tests/playbooks/test_with_ceph.yaml index 070fde262..cb8362370 100644 --- a/tests/playbooks/test_with_ceph.yaml +++ b/tests/playbooks/test_with_ceph.yaml @@ -66,6 +66,12 @@ - role: heat_adoption tags: - heat_adoption + - role: telemetry_adoption + tags: + - telemetry_adoption + - role: autoscaling_adoption + tags: + - autoscaling_adoption - role: manila_adoption tags: - manila_adoption diff --git a/tests/roles/autoscaling_adoption/meta/main.yaml b/tests/roles/autoscaling_adoption/meta/main.yaml new file mode 100644 index 000000000..610f184fb --- /dev/null +++ b/tests/roles/autoscaling_adoption/meta/main.yaml @@ -0,0 +1,2 @@ +dependencies: + - role: common_defaults diff --git a/tests/roles/autoscaling_adoption/tasks/main.yaml b/tests/roles/autoscaling_adoption/tasks/main.yaml new file mode 100644 index 000000000..603077954 --- /dev/null +++ b/tests/roles/autoscaling_adoption/tasks/main.yaml @@ -0,0 +1,42 @@ +- name: deploy podified Aodh + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + oc patch openstackcontrolplane openstack --type=merge --patch ' + spec: + telemetry: + enabled: true + template: + autoscaling: + enabled: true + aodh: + passwordSelector: + aodhService: AodhPassword + databaseAccount: aodh + databaseInstance: openstack + secret: osp-secret + serviceUser: aodh + heatInstance: heat + ' + +- name: wait for Aodh to start up + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + oc get pod --selector=service=aodh -o jsonpath='{.items[0].status.phase}{"\n"}' | grep Running + register: aodh_running_result + until: aodh_running_result is success + retries: 60 + delay: 2 + +- name: check that Aodh is reachable and its endpoints are defined + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + alias openstack="oc exec -t openstackclient -- openstack" + + ${BASH_ALIASES[openstack]} endpoint list | grep aodh + register: aodh_responding_result + until: aodh_responding_result is success + retries: 60 + delay: 2 diff --git a/tests/roles/backend_services/defaults/main.yaml b/tests/roles/backend_services/defaults/main.yaml index 6e45afc4b..8f83f7e18 100644 --- a/tests/roles/backend_services/defaults/main.yaml +++ b/tests/roles/backend_services/defaults/main.yaml @@ -1,7 +1,9 @@ # Service account passwords (not service DB passwords) from the # original deployment. enable_tlse: false +aodh_password: '' barbican_password: '' +ceilometer_password: '' cinder_password: '' manila_password: '' glance_password: '' diff --git a/tests/roles/backend_services/tasks/main.yaml b/tests/roles/backend_services/tasks/main.yaml index d6cdcb774..87596f79c 100644 --- a/tests/roles/backend_services/tasks/main.yaml +++ b/tests/roles/backend_services/tasks/main.yaml @@ -16,9 +16,15 @@ ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} + {% if aodh_password %} + oc set data secret/osp-secret "AodhPassword={{ aodh_password }}" + {% endif %} {% if barbican_password %} oc set data secret/osp-secret "BarbicanPassword={{ barbican_password }}" {% endif %} + {% if ceilometer_password %} + oc set data secret/osp-secret "CeilometerPassword={{ ceilometer_password }}" + {% endif %} {% if cinder_password %} oc set data secret/osp-secret "CinderPassword={{ cinder_password }}" {% endif %} diff --git a/tests/roles/dataplane_adoption/tasks/main.yaml b/tests/roles/dataplane_adoption/tasks/main.yaml index 2ced096ab..3a23072ca 100644 --- a/tests/roles/dataplane_adoption/tasks/main.yaml +++ b/tests/roles/dataplane_adoption/tasks/main.yaml @@ -163,6 +163,7 @@ {%+ endif +%} - ovn - neutron-metadata + - telemetry env: - name: ANSIBLE_CALLBACKS_ENABLED value: "profile_tasks" @@ -268,6 +269,7 @@ - nova - ovn - neutron-metadata + - telemetry nodeTemplate: extraMounts: - extraVolType: Ceph diff --git a/tests/roles/heat_adoption/tasks/main.yaml b/tests/roles/heat_adoption/tasks/main.yaml index 73366aa29..6eac6ceeb 100644 --- a/tests/roles/heat_adoption/tasks/main.yaml +++ b/tests/roles/heat_adoption/tasks/main.yaml @@ -28,6 +28,8 @@ passwordSelectors: authEncryptionKey: HeatAuthEncryptionKey service: HeatPassword + rabbitMqClusterName: rabbitmq + serviceUser: heat ' - name: wait for Heat to start up diff --git a/tests/roles/stop_openstack_services/tasks/main.yaml b/tests/roles/stop_openstack_services/tasks/main.yaml index f19ea01a7..1e95a5f7b 100644 --- a/tests/roles/stop_openstack_services/tasks/main.yaml +++ b/tests/roles/stop_openstack_services/tasks/main.yaml @@ -13,7 +13,14 @@ {{ oc_header }} {{ stop_openstack_services_shell_vars }} - ServicesToStop=("tripleo_horizon.service" + ServicesToStop=("tripleo_aodh_api.service" + "tripleo_aodh_api_cron.service" + "tripleo_aodh_evaluator.service" + "tripleo_aodh_listener.service" + "tripleo_aodh_notifier.service" + "tripleo_ceilometer_agent_central.service" + "tripleo_ceilometer_agent_notification.service" + "tripleo_horizon.service" "tripleo_keystone.service" "tripleo_barbican_api.service" "tripleo_barbican_worker.service" @@ -23,7 +30,11 @@ "tripleo_cinder_scheduler.service" "tripleo_cinder_volume.service" "tripleo_cinder_backup.service" + "tripleo_collectd.service" "tripleo_glance_api.service" + "tripleo_gnocchi_api.service" + "tripleo_gnocchi_metricd.service" + "tripleo_gnocchi_statsd.service" "tripleo_manila_api.service" "tripleo_manila_api_cron.service" "tripleo_manila_scheduler.service" diff --git a/tests/roles/telemetry_adoption/meta/main.yaml b/tests/roles/telemetry_adoption/meta/main.yaml new file mode 100644 index 000000000..610f184fb --- /dev/null +++ b/tests/roles/telemetry_adoption/meta/main.yaml @@ -0,0 +1,2 @@ +dependencies: + - role: common_defaults diff --git a/tests/roles/telemetry_adoption/tasks/main.yaml b/tests/roles/telemetry_adoption/tasks/main.yaml new file mode 100644 index 000000000..56ec08ebd --- /dev/null +++ b/tests/roles/telemetry_adoption/tasks/main.yaml @@ -0,0 +1,110 @@ +- name: deploy cluster-observability-operator + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + oc create -f - <