diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..db72edc --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,32 @@ +name: Build Artifacts +on: + push: + branches: + - '**' + +jobs: + multiplatform_build: + strategy: + fail-fast: false + matrix: + component: + - name: deployment-status-provisioner + file: docker/Dockerfile + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push + uses: docker/build-push-action@v5 + with: + no-cache: true + context: ${{ matrix.component.dir }} + file: ${{ matrix.component.file }} + platforms: linux/amd64,linux/arm64 + push: false + tags: ${{ matrix.component.name }} + provenance: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0307310 --- /dev/null +++ b/.gitignore @@ -0,0 +1,79 @@ +.idea/ +# Temporary Build Files +build/_output +build/_test +# Created by https://www.gitignore.io/api/go,vim,emacs,visualstudiocode +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* +# Org-mode +.org-id-locations +*_archive +# flymake-mode +*_flymake.* +# eshell files +/eshell/history +/eshell/lastdir +# elpa packages +/elpa/ +# reftex files +*.rel +# AUCTeX auto folder +/auto/ +# cask packages +.cask/ +dist/ +# Flycheck +flycheck_*.el +# server auth directory +/server/ +# projectiles files +.projectile +projectile-bookmarks.eld +# directory configuration +.dir-locals.el +# saveplace +places +# url cache +url/cache/ +# cedet +ede-projects.el +# smex +smex-items +# company-statistics +company-statistics-cache.el +# anaconda-mode +anaconda-mode/ +### Go ### +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +# Test binary, build with 'go test -c' +*.test +# Output of the go coverage tool, specifically when used with LiteIDE +*.out +### Vim ### +# swap +.sw[a-p] +.*.sw[a-p] +# session +Session.vim +# temporary +.netrwhist +# auto-generated tag files +tags +### VisualStudioCode ### +.vscode/* +.history +# End of https://www.gitignore.io/api/go,vim,emacs,visualstudiocode +*.iml \ No newline at end of file diff --git a/Readme.md b/Readme.md index 8b13789..488d1f7 100644 --- a/Readme.md +++ b/Readme.md @@ -1 +1,271 @@ +This guide provides information about the usage of the Deployment Status Provisioner. +Topics covered in this section: + +[[_TOC_]] + +# Overview + +Deployment Status Provisioner is a component for providing the overall service status in DP/App Deployer jobs. + +![status-provisioner](/documentation/images/status-provisioner.drawio.png) + +# Common information + +`Deployment Status Provisioner` is a component for providing the overall service status in DP/App Deployer jobs. It is +used to receive statuses from all required service resources and specify the final result to a preselected resource from +where the DP and App Deployers read the status. + +First of all, `Deployment Status Provisioner` checks readiness status of resources specified in `MONITORED_RESOURCES` +parameter. If all resources are successfully started, the status condition displays the following message: + +``` +All components are in ready status. +``` + +If some resources are not started in the allotted time, status condition contains `RESOURCE_NAME component is not ready` +message for each unready resource, where `RESOURCE_NAME` is the name of monitored resource. + +Then `Deployment Status Provisioner` checks the result of integration tests if it is necessary. If the integration tests +fail, the status condition outputs a message from the `INTEGRATION_TESTS_RESOURCE` status. If they do not complete in +the allotted time, you will see `Integration tests have not completed in INTEGRATION_TESTS_TIMEOUT seconds` message +in the status condition. If the integration tests complete successfully, the status condition displays +`Integration tests are successfully completed` message. + +You also can find information about monitored resources and array with failed resources in the pod logs. + +# Usage + +To use `Deployment Status Provisioner` you need to create a resource inside your Helm chart, which creates Pod with the +latest image of `Deployment Status Provisioner` and the following parameters: + +The `INITIAL_WAIT` parameter specifies the time in seconds that the `Deployment Status Provisioner` waits before starting +to check readiness status for monitored components. It is important for `upgrade` process. The default value is `30`. + +The `MONITORED_RESOURCES` parameter specifies the comma-separated list of resources that should be monitored by +`Deployment Status Provisioner`. Each resource description should consist of **two** parts separated by space: resource +kind and its name. There is ability to monitor readiness status only for the following resource kinds: + +* `DaemonSet` +* `Deployment` +* `Job` +* `StatefulSet` + +For example, if you have Stateful Set with name `consul-server`, its description should look like `StatefulSet consul-server`. +A complete example for this parameter would be `Deployment consul-backup-daemon, DaemonSet consul, StatefulSet consul-server, Job consul-server-acl-init`. +This parameter is mandatory and does not have default value. + +The `MONITORED_CUSTOM_RESOURCES` parameter specifies the comma-separated list of custom resources that should be monitored by `Deployment Status Provisioner`. Each resource description should consist of **six** or **seven** parts separated by space: + +* `group` is the group of custom resource. It is required. For example, `netcracker.com`. +* `version` is the version of custom resource. It is required. For example, `v1`. +* `plural` is the custom resource's plural name. It is required. For example, `opensearchservices`. +* `name` is the custom resource's name. It is required. For example, `opensearch`. +* `expression` is the JSONPath (query language for JSON) expression to get custom resource status. It is required. For example, you need to get `type` field value from the following custom resource status if `reason` field is equal to `ReconcileCycleStatus`: + + ```yaml + status: + conditions: + - lastTransitionTime: 2024-02-27 10:06:13.746985042 +0000 UTC m=+199.958634385 + message: The deployment readiness status check is successful + reason: ReconcileCycleStatus + status: 'True' + type: Successful + - lastTransitionTime: 2024-02-27 10:06:08.714381731 +0000 UTC m=+194.926031082 + message: Component pods are ready + reason: ComponentReadinessStatus + status: 'True' + type: Ready + disasterRecoveryStatus: + mode: '' + status: '' + ``` + + In that case required expression looks like `$.status.conditions[?(@.reason=='ReconcileCycleStatus')].type`. If you need to get status from a specific field (for example, `component.status`) in the following custom resource: + + ``` + apiVersion: netcracker.com/v1 + kind: ComponentService + metadata: + creationTimestamp: '2024-02-27T10:02:51Z' + generation: 1 + name: component + namespace: component-service + spec: + global: + podReadinessTimeout: 700 + waitForPodsReady: true + component: + replicas: 3 + resources: + limits: + cpu: 500m + memory: 1024Mi + requests: + cpu: 100m + memory: 1024Mi + status: Success + ``` + + you can specify `$.spec.component.status` expression. For more information, refer to [Python JSONPath Next-Generation](https://github.com/h2non/jsonpath-ng/blob/master/README.rst). + +* `successful condition` is the value that should be considered as successfully processed custom resource. It is required. For example, `Successful`. +* `failed condition` is the value that should be considered as inability to process the custom resource. It is optional. If it is not specified, `Deployment Status Provisioner` will try to find `successful condition` before time runs out (`CR_PROCESSING_TIMEOUT`). For example, `Failed`. + +A complete example for this parameter would be as follows: + +``` +netcracker.com v1 opensearchservices opensearch $.status.conditions[?(@.reason=='ReconcileCycleStatus')].type Successful Failed, netcracker.com v1 customservices name $.spec.status.type Ready +``` + +The `RESOURCE_TO_SET_STATUS` parameter specifies the characteristics of the resource to set the final status of the cluster. +This parameter value should consist of **four** parts separated by space: resource group, version, plural and its name. +For example, if you want to write down the status to `Job` named `consul-status-provisioner`, the value should look +like `batch v1 jobs consul-status-provisioner`. +This parameter is mandatory and does not have default value. + +The `NAMESPACE` parameter specifies the namespace in OpenShift/Kubernetes where all the monitored resources and resource +to set status are located. This parameter is mandatory and does not have default value. + +The `CONDITION_REASON` parameter specifies the name of the condition reason that is used when setting the status condition +for the `RESOURCE_TO_SET_STATUS` resource. For example, `ConsulServiceReadinessStatus`. The default value is `ServiceReadinessStatus`. + +The `SUCCESSFUL_CONDITION_TYPE` parameter specifies the condition type that is used when setting the successful status +condition for the `RESOURCE_TO_SET_STATUS` resource. For example, `Success`. The default value is `Successful`. + +The `FAILED_CONDITION_TYPE` parameter specifies the condition type that is used when setting the failed status condition +for the `RESOURCE_TO_SET_STATUS` resource. For example, `Fail`. The default value is `Failed`. + +The `POD_READINESS_TIMEOUT` parameter specifies the timeout in seconds that the `Deployment Status Provisioner` waits for +each of the monitored resources to be ready or completed. The default value is `300`. + +The `CR_PROCESSING_TIMEOUT` parameter specifies the timeout in seconds the `Deployment Status Provisioner` waits for each of the monitored custom resources to have `successful` or `failed` status. The default value is `300`. + +The `INTEGRATION_TESTS_RESOURCE` parameter specifies the characteristics of the resource which the status of +integration tests execution is stored in. This parameter value should consist of **four** parts separated by space: +resource group, version, plural and its name. For example, if you want to read the integration tests status from `Deployment` +named `consul-integration-tests-runner`, the value should look like `apps v1 deployments consul-integration-tests-runner`. +This parameter should be specified only if you want the result of the integration tests to get inside the final cluster +status. + +The `INTEGRATION_TESTS_CONDITION_REASON` parameter specifies the name of the condition reason which meets the condition +with the result of the integration tests in the `INTEGRATION_TESTS_RESOURCE` resource. The default value is `IntegrationTestsExecutionStatus`. +This parameter is meaningless without `INTEGRATION_TESTS_RESOURCE` parameter. + +The `INTEGRATION_TESTS_SUCCESSFUL_CONDITION_TYPE` parameter specifies the condition type which corresponds to the successful +result of the integration tests in the status condition of the `INTEGRATION_TESTS_RESOURCE` resource. The default value +is `Ready`. +This parameter is meaningless without `INTEGRATION_TESTS_RESOURCE` parameter. + +The `INTEGRATION_TESTS_TIMEOUT` parameter specifies the timeout in seconds that the `Deployment Status Provisioner` waits for +successful or failed status condition in the `INTEGRATION_TESTS_RESOURCE` resource. The default value is `300`. +This parameter is meaningless without `INTEGRATION_TESTS_RESOURCE` parameter. + +The `TREAT_STATUS_AS_FIELD` parameter specifies whether resource status should be treated as field. It is necessary when initially `RESOURCE_TO_SET_STATUS` does not have `Status` sub-resource. In that case status is set as a field to chosen resource. For example, it may be applicable for some of custom resources. The default value +is `False`. + +# Example + +`Deployment Status Provisioner` job with only required environment variables looks like the follows: + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: my-status-provisioner + labels: + app.kubernetes.io/instance: {{ .Release.Name }} +spec: + template: + metadata: + name: my-status-provisioner + labels: + component: status-provisioner + spec: + restartPolicy: Never + serviceAccountName: my-status-provisioner + containers: + - name: status-provisioner + image: artifactorycn.netcracker.com:17008/product/prod.platform.streaming_deployment-status-provisioner:master_latest + imagePullPolicy: "Always" + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MONITORED_RESOURCES + value: "Deployment backup-daemon, StatefulSet server, Job server-acl-init" + - name: RESOURCE_TO_SET_STATUS + value: "batch v1 jobs my-status-provisioner" + resources: + requests: + memory: "50Mi" + cpu: "50m" + limits: + memory: "50Mi" + cpu: "50m" +``` +**NOTE:** You cannot use artifactory Docker images in your Helm templates due to external environments, it is necessary to use `DeploymentDescriptor` values and to add deployment status provisioner to dependencies. For example: + +```yaml + - type: find-latest-deployment-descriptor + repo: PROD.Platform.Streaming/deployment-status-provisioner + location: 0.0.16 + docker-image-id: timestamp + deploy-param: deploymentStatusProvisioner +``` + +You should also create `Service Account`, `Role Binding` and `Role` with permissions that allow `Deployment Status Provisioner` +to work with your monitored resources. + +`Deployment Status Provisioner` role should allow to `get` statuses for all resources that are specified in the `MONITORED_RESOURCES` +parameter. In addition, the role should give permissions to `get` and `patch` status for the resource from `RESOURCE_TO_SET_STATUS` +parameter. So, according to the configured `Deployment Status Provisioner` job, the role should look like this: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: my-status-provisioner +rules: + - apiGroups: + - apps + resources: + - deployments/status + - statefulsets/status + verbs: + - get + - apiGroups: + - batch + resources: + - jobs/status + verbs: + - get + - patch +``` + +And the following `deployment-configuration.json` can be used: +```yaml +{ + "statusPolling":{ + "resourceType": "job.batch", + "resourceName": "my-status-provisioner", + "statusPath": "$.status.conditions[?(@.type=='Successful')]", + "statusPathFail": "$.status.conditions[?(@.type=='Failed')]", + "timeout": "${ CUSTOM_TIMEOUT_MIN ? CUSTOM_TIMEOUT_MIN : '10' }" + } +} +``` + +The example of status subresource: +```yaml +status: + conditions: + - lastTransitionTime: 2023-10-31 07:45:28.487195606 +0000 UTC m=+74.412108827 + message: The deployment readiness status check is successful + reason: ServiceReadinessStatus + status: 'True' + type: Successful +``` + +A complete example can be found in [Consul Service Templates](https://git.netcracker.com/PROD.Platform.Streaming/consul-service/-/tree/master/charts/helm/consul-service/templates/status-provisioner). \ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..ea91cb4 --- /dev/null +++ b/build.sh @@ -0,0 +1,10 @@ +DOCKER_FILE="docker/Dockerfile" + +echo "Build deployment status provisioner image" +for docker_image_name in ${DOCKER_NAMES}; do + docker build \ + --file=${DOCKER_FILE} \ + --pull \ + -t ${docker_image_name} \ + . +done \ No newline at end of file diff --git a/buildConfig.yaml b/buildConfig.yaml new file mode 100644 index 0000000..e51abcb --- /dev/null +++ b/buildConfig.yaml @@ -0,0 +1,4 @@ +type: image +builders: + - docker: + file: docker/Dockerfile diff --git a/description.yaml b/description.yaml new file mode 100644 index 0000000..deca31c --- /dev/null +++ b/description.yaml @@ -0,0 +1,9 @@ +build: + networkRules: nc.product.dtrust + env: + type: microservice + version: generic-1.0 +publication: + docker: + - latest + - timestamp \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..463222c --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,50 @@ +FROM python:3.10.14-alpine3.20 + +ENV STATUS_PROVISIONER_HOME=/opt/provisioner \ + PYTHONUNBUFFERED=1 + + +COPY docker/requirements.txt ${STATUS_PROVISIONER_HOME}/requirements.txt +COPY docker/docker-entrypoint.sh / +COPY docker/*.py ${STATUS_PROVISIONER_HOME}/ + +RUN set -x && apk add --upgrade --no-cache bash python3 apk-tools wget sed + +# Install kubectl - it is required for vault-service-status-provisioner-cleanup job +ARG KUBECTL_VERSION="v1.30.1" +RUN set -x \ + && wget \ + --no-check-certificate \ + -nv \ + -O "/usr/local/bin/kubectl" \ + "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" \ + && chmod +x "/usr/local/bin/kubectl" + +# Upgrade all tools to avoid vulnerabilities +RUN set -x && apk upgrade --no-cache --available + +#Add unprivileged user +RUN set -x \ + && addgroup -S -g 1000 provisioner \ + && adduser -s /bin/bash -S -G provisioner -u 1000 provisioner \ + && addgroup provisioner root + +RUN set -x \ + && python3 -m ensurepip \ + && rm -r /usr/lib/python*/ensurepip \ + && pip3 install --upgrade pip setuptools==70.0.0 \ + && pip3 install -r ${STATUS_PROVISIONER_HOME}/requirements.txt \ + && rm -rf /var/cache/apk/* + +RUN set -x \ + && for path in \ + /docker-entrypoint.sh \ + ; do \ + chmod +x "$path"; \ + chgrp 0 "$path"; \ + done + +WORKDIR ${STATUS_PROVISIONER_HOME} + +USER 1000:0 +ENTRYPOINT ["/docker-entrypoint.sh"] diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh new file mode 100644 index 0000000..6dd7263 --- /dev/null +++ b/docker/docker-entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +sleep ${INITIAL_WAIT:-30} +echo "Status Provisioner have started calculating the state of the cluster" +exec python ${STATUS_PROVISIONER_HOME}/status_provisioner.py $@ \ No newline at end of file diff --git a/docker/libraries.py b/docker/libraries.py new file mode 100644 index 0000000..c059ca4 --- /dev/null +++ b/docker/libraries.py @@ -0,0 +1,201 @@ +from datetime import datetime + +import kubernetes +import urllib3 +from kubernetes.client import V1ComponentCondition, V1ComponentStatus + +DEFAULT_TIMEOUT = '300' + + +class ConditionReason: + DEFAULT = 'ServiceReadinessStatus' + INTEGRATION_TESTS_DEFAULT = 'IntegrationTestsExecutionStatus' + + +class ConditionType: + FAILED = 'Failed' + IN_PROGRESS = 'In Progress' + READY = 'Ready' + SUCCESSFUL = 'Successful' + + +class ConditionStatus: + FALSE = 'False' + TRUE = 'True' + + +class CustomResource(object): + + def __init__(self, custom_resource: str): + parts = custom_resource.strip().split() + if len(parts) != 4: + raise Exception(f'The description of specified resource must contain 4 parts: ' + f'group, version, plural and name. But [{custom_resource}] is received.') + self.group = parts[0] + self.version = parts[1] + self.plural = parts[2] + self.name = parts[3] + + def __str__(self): + return f'{self.group}/{self.version} {self.plural} {self.name}' + + +def get_kubernetes_api_client(config_file=None, context=None, persist_config=True): + try: + kubernetes.config.load_incluster_config() + return kubernetes.client.ApiClient() + except kubernetes.config.ConfigException: + return kubernetes.config.new_client_from_config(config_file=config_file, + context=context, + persist_config=persist_config) + + +class KubernetesLibrary(object): + + def __init__(self, + namespace: str, + resource_to_set_status=None, + config_file=None, + context=None, + persist_config=True): + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + self.k8s_api_client = get_kubernetes_api_client(config_file=config_file, + context=context, + persist_config=persist_config) + self.k8s_apps_v1_client = kubernetes.client.AppsV1Api(self.k8s_api_client) + self.k8s_batch_v1_client = kubernetes.client.BatchV1Api(self.k8s_api_client) + self.custom_objects_api = kubernetes.client.CustomObjectsApi(self.k8s_api_client) + self.namespace = namespace + if resource_to_set_status: + self.status_resource = CustomResource(resource_to_set_status) + + def delete_job(self, name): + self.k8s_batch_v1_client.delete_namespaced_job(name, self.namespace, propagation_policy='Background') + + def is_resource_ready(self, resource_type: str, name: str) -> bool: + if resource_type == 'daemonset': + return self.is_daemon_set_ready(name) + elif resource_type == 'deployment': + return self.is_deployment_ready(name) + elif resource_type == 'job': + return self.is_job_succeeded(name) + elif resource_type == 'statefulset': + return self.is_stateful_set_ready(name) + else: + raise Exception(f'The type [{resource_type}] is not supported yet.') + + def is_daemon_set_ready(self, name: str) -> bool: + daemon_set = self.k8s_apps_v1_client.read_namespaced_daemon_set_status(name, self.namespace) + return (daemon_set.status.desired_number_scheduled == daemon_set.status.number_ready + and daemon_set.status.desired_number_scheduled == daemon_set.status.updated_number_scheduled) + + def is_deployment_ready(self, name: str) -> bool: + deployment = self.k8s_apps_v1_client.read_namespaced_deployment_status(name, self.namespace) + return (deployment.status.replicas == deployment.status.ready_replicas + and deployment.status.replicas == deployment.status.updated_replicas) + + def is_job_succeeded(self, name: str) -> bool: + job = self.k8s_batch_v1_client.read_namespaced_job_status(name, self.namespace) + return job.status.succeeded == 1 + + def is_stateful_set_ready(self, name: str) -> bool: + stateful_set = self.k8s_apps_v1_client.read_namespaced_stateful_set_status(name, self.namespace) + return (stateful_set.status.replicas == stateful_set.status.ready_replicas + and stateful_set.status.replicas == stateful_set.status.updated_replicas) + + def get_custom_resource(self, resource: CustomResource): + return self.custom_objects_api.get_namespaced_custom_object(resource.group, resource.version, self.namespace, + resource.plural, resource.name) + + def get_custom_resource_status_condition(self, resource: CustomResource, condition_reason: str) -> dict: + resource_status = self.custom_objects_api.get_namespaced_custom_object_status(resource.group, resource.version, + self.namespace, resource.plural, + resource.name) + conditions = resource_status['status'].get('conditions') + if conditions: + for i, condition in enumerate(conditions): + if condition.get('reason') == condition_reason: + return condition + return {} + + def update_custom_resource_status_condition(self, new_condition: dict): + resource_status = self.custom_objects_api.get_namespaced_custom_object_status(self.status_resource.group, + self.status_resource.version, + self.namespace, + self.status_resource.plural, + self.status_resource.name) + status = resource_status.get('status') + if not status: + status = {} + resource_status['status'] = status + conditions = status.get('conditions') + if not conditions: + conditions = [] + is_condition_found = False + for i, condition in enumerate(conditions): + if (condition.get('reason') == new_condition['reason'] + or condition.get('reason') is None and condition.get('message') == new_condition['reason']): + conditions[i] = new_condition + is_condition_found = True + break + if not is_condition_found: + conditions.append(new_condition) + + resource_status['status']['conditions'] = conditions + self.custom_objects_api.patch_namespaced_custom_object_status(self.status_resource.group, + self.status_resource.version, + self.namespace, + self.status_resource.plural, + self.status_resource.name, + resource_status) + + def update_custom_resource_status_as_field(self, new_condition: dict): + custom_resource = self.custom_objects_api.get_namespaced_custom_object(self.status_resource.group, + self.status_resource.version, + self.namespace, + self.status_resource.plural, + self.status_resource.name) + status = custom_resource.get('status', None) + if status: + conditions = status.get('conditions', []) + else: + conditions = [] + is_condition_found = False + new_condition = V1ComponentCondition( + type=new_condition['type'], + status=new_condition['status'], + message=new_condition['reason'] + ) + for i, condition in enumerate(conditions): + if condition.get('message') == new_condition.message: + conditions[i] = new_condition + is_condition_found = True + break + if not is_condition_found: + conditions.append(new_condition) + + status = V1ComponentStatus(conditions=conditions) + custom_resource['status'] = status + self.custom_objects_api.patch_namespaced_custom_object(self.status_resource.group, + self.status_resource.version, + self.namespace, + self.status_resource.plural, + self.status_resource.name, + custom_resource) + + +class Condition(object): + + def __init__(self, reason: str, successful_type): + self.reason = reason + self.successful_type = successful_type + + def get_condition(self, type: str, message: str): + return { + 'type': type, + 'status': ConditionStatus.TRUE if type == self.successful_type else ConditionStatus.FALSE, + 'lastTransitionTime': datetime.utcnow().isoformat()[:-3] + 'Z', + 'reason': self.reason, + 'message': message + } diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..a26b668 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,5 @@ +cachetools==5.0.0 +PyYAML==6.0.1 +certifi==2023.7.22 +kubernetes==12.0.1 +jsonpath-ng==1.6.1 \ No newline at end of file diff --git a/docker/status_provisioner.py b/docker/status_provisioner.py new file mode 100644 index 0000000..277feb1 --- /dev/null +++ b/docker/status_provisioner.py @@ -0,0 +1,131 @@ +import os +import time + +from jsonpath_ng.ext import parse + +from libraries import KubernetesLibrary, Condition, ConditionReason, ConditionType, CustomResource, DEFAULT_TIMEOUT + + +def get_resources_statuses(resources: str, kubernetes_library: KubernetesLibrary) -> []: + timeout = int(os.getenv('POD_READINESS_TIMEOUT', DEFAULT_TIMEOUT)) + statuses = [] + resources_list = resources.split(',') if resources else [] + for resource in resources_list: + resource = resource.strip() + print(f'Processing [{resource}] resource') + parts = resource.split() + if len(parts) != 2: + raise Exception(f'Resource description must contain 2 parts: type and name. ' + f'But [{resource}] is received.') + resource_type = parts[0].lower() + resource_name = parts[1] + start_time = time.time() + message = f'[{resource_name}] component is not ready.' + while start_time + timeout > time.time(): + if kubernetes_library.is_resource_ready(resource_type, resource_name): + message = '' + break + time.sleep(5) + statuses.append(message) + return statuses + + +def get_custom_resources_statuses(custom_resources: str, kubernetes_library: KubernetesLibrary) -> []: + timeout = int(os.getenv('CR_PROCESSING_TIMEOUT', DEFAULT_TIMEOUT)) + statuses = [] + resources_list = custom_resources.split(',') if custom_resources else [] + for resource in resources_list: + resource = resource.strip() + parts = resource.split() + if len(parts) != 6 and len(parts) != 7: + raise Exception(f'Resource description must contain 6 or 7 parts. But [{resource}] is received.') + expression = parts[4] + successful_condition = parts[5] + failed_condition = parts[6] if len(parts) == 7 else None + custom_resource = CustomResource(resource.split(expression)[0]) + print(f'Processing [{custom_resource}] custom resource') + + message = f'[{custom_resource}] custom resource does not have successful condition after {timeout} seconds.' + jsonpath_expression = parse(expression) + start_time = time.time() + while start_time + timeout > time.time(): + cr = kubernetes_library.get_custom_resource(custom_resource) + match = jsonpath_expression.find(cr) + if match: + if match[-1].value == successful_condition: + message = '' + break + if failed_condition and match[-1].value == failed_condition: + message = (f'Processing status of [{custom_resource}] custom resource is {failed_condition}. ' + f'For more details, check custom resource status.') + break + time.sleep(5) + statuses.append(message) + return statuses + + +def get_integration_tests_status(integration_tests_resource: str, kubernetes_library: KubernetesLibrary) -> str: + integration_tests_condition_reason = os.getenv('INTEGRATION_TESTS_CONDITION_REASON', + ConditionReason.INTEGRATION_TESTS_DEFAULT) + integration_tests_successful_condition_type = os.getenv('INTEGRATION_TESTS_SUCCESSFUL_CONDITION_TYPE', + ConditionType.READY) + integration_tests_timeout = int(os.getenv('INTEGRATION_TESTS_TIMEOUT', DEFAULT_TIMEOUT)) + + resource = CustomResource(integration_tests_resource) + print(f'Processing integration tests status from [{resource.name}] resource') + start_time = time.time() + while start_time + integration_tests_timeout > time.time(): + condition = kubernetes_library.get_custom_resource_status_condition(resource, + integration_tests_condition_reason) + if condition and condition.get('type') != ConditionType.IN_PROGRESS: + return '' if condition.get('type') == integration_tests_successful_condition_type else condition.get( + 'message') + time.sleep(5) + return f'Integration tests have not completed in {integration_tests_timeout} seconds.' + + +if __name__ == '__main__': + monitored_resources = os.getenv('MONITORED_RESOURCES') + monitored_custom_resources = os.getenv('MONITORED_CUSTOM_RESOURCES') + namespace = os.getenv('NAMESPACE') + resource_to_set_status = os.getenv('RESOURCE_TO_SET_STATUS') + treat_status_as_field = os.getenv('TREAT_STATUS_AS_FIELD', False) + if (monitored_resources or monitored_custom_resources) and namespace and resource_to_set_status: + condition_reason = os.getenv('CONDITION_REASON', ConditionReason.DEFAULT) + successful_condition_type = os.getenv('SUCCESSFUL_CONDITION_TYPE', ConditionType.SUCCESSFUL) + failed_condition_type = os.getenv('FAILED_CONDITION_TYPE', ConditionType.FAILED) + kubernetes_library = KubernetesLibrary(namespace, resource_to_set_status) + condition_library = Condition(condition_reason, successful_condition_type) + successful_status_message = 'All components are in ready status.' + + # Update status condition with 'In Progress' state + status_condition = condition_library.get_condition(ConditionType.IN_PROGRESS, + 'Computing of cluster state is in progress') + if treat_status_as_field: + kubernetes_library.update_custom_resource_status_as_field(status_condition) + else: + kubernetes_library.update_custom_resource_status_condition(status_condition) + + # Calculates statuses of resources specified in MONITORED_RESOURCES parameter. + received_statuses = get_resources_statuses(monitored_resources, kubernetes_library) + + # Calculates statuses of custom resources specified in MONITORED_CUSTOM_RESOURCES parameter. + received_statuses.extend(get_custom_resources_statuses(monitored_custom_resources, kubernetes_library)) + + # Receive the results of running integration tests + integration_tests_resource = os.getenv('INTEGRATION_TESTS_RESOURCE') + if integration_tests_resource: + successful_status_message = f'{successful_status_message} Integration tests are successfully completed.' + integration_tests_status = get_integration_tests_status(integration_tests_resource, kubernetes_library) + received_statuses.append(integration_tests_status) + + # Update status condition with final state + received_statuses = list(filter(None, received_statuses)) + print(f'Failed components statuses are {received_statuses}') + condition_type = failed_condition_type if len(received_statuses) else successful_condition_type + condition_message = ' '.join(received_statuses) if len(received_statuses) else successful_status_message + status_condition = condition_library.get_condition(condition_type, condition_message) + if treat_status_as_field: + kubernetes_library.update_custom_resource_status_as_field(status_condition) + else: + kubernetes_library.update_custom_resource_status_condition(status_condition) diff --git a/documentation/images/status-provisioner.drawio b/documentation/images/status-provisioner.drawio new file mode 100644 index 0000000..688731b --- /dev/null +++ b/documentation/images/status-provisioner.drawio @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/documentation/images/status-provisioner.drawio.png b/documentation/images/status-provisioner.drawio.png new file mode 100644 index 0000000..897ed95 Binary files /dev/null and b/documentation/images/status-provisioner.drawio.png differ