From a66ebb8a1a76355b6f3061bb08e77eec4cee044b Mon Sep 17 00:00:00 2001 From: "shaowei.wayne" Date: Tue, 21 Nov 2023 15:47:52 +0800 Subject: [PATCH] add detailed comments for katalyst enhancements and other consts --- pkg/consts/overcommit.go | 1 + pkg/consts/qos.go | 49 ++++++++++++++++++++++++++++++++++++++++ pkg/consts/spd.go | 8 +++---- pkg/consts/vpa.go | 1 + 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/pkg/consts/overcommit.go b/pkg/consts/overcommit.go index f7f7ccd..db90a05 100644 --- a/pkg/consts/overcommit.go +++ b/pkg/consts/overcommit.go @@ -27,6 +27,7 @@ const ( NodeAnnotationOriginalAllocatableMemoryKey = "katalyst.kubewharf.io/original_allocatable_memory" ) +// const variables for matching up with node labels about overcommit const ( NodeOvercommitSelectorKey = "katalyst.kubewharf.io/overcommit_node_pool" diff --git a/pkg/consts/qos.go b/pkg/consts/qos.go index 5bf3e85..bd92b0a 100644 --- a/pkg/consts/qos.go +++ b/pkg/consts/qos.go @@ -30,11 +30,30 @@ const ( const ( PodAnnotationMemoryEnhancementKey = "katalyst.kubewharf.io/memory_enhancement" + // PodAnnotationMemoryEnhancementRssOverUseThreshold provides a mechanism to enable + // the ability of overcommit for memory, and we will relay on this enhancement to ensure + // memory protection if rss usage exceeds requests (based on this given ratio) PodAnnotationMemoryEnhancementRssOverUseThreshold = "rss_overuse_threshold" + // PodAnnotationMemoryEnhancementNumaBinding provides a mechanism to enable numa-binding + // for workload to provide more ultimate running performances. + // + // With PodAnnotationMemoryEnhancementNumaBinding but without PodAnnotationMemoryEnhancementNumaExclusive, + // we have several constraints below: + // 1. different workloads may still share the same numa + // - these workloads may still have contentions on memory bandwidth + // 2. the request for pod can be settled in a single numa node + // - this to avoid complicated cross numa memory capacity/bandwidth control + // + // todo: this enhancement is only supported for dedicated-cores now, + // the community if to support shared-cores in the short future. PodAnnotationMemoryEnhancementNumaBinding = "numa_binding" PodAnnotationMemoryEnhancementNumaBindingEnable = "true" + // PodAnnotationMemoryEnhancementNumaExclusive provides a mechanism to enable numa-exclusive + // for A SINGLE Pod to avoid contention on memory bandwidth and so on. + // + // - this enhancement is only supported for dedicated-cores, for now and foreseeable future PodAnnotationMemoryEnhancementNumaExclusive = "numa_exclusive" PodAnnotationMemoryEnhancementNumaExclusiveEnable = "true" ) @@ -43,8 +62,23 @@ const ( const ( PodAnnotationCPUEnhancementKey = "katalyst.kubewharf.io/cpu_enhancement" + // PodAnnotationCPUEnhancementCPUSet provides a mechanism separate cpuset into + // several orthogonal pools to avoid cpu contentions for different types of workloads, + // i.e. spark batch, flink streaming, web service may fall into three pools. + // and, each individual pod should be put into only one pool. + // + // - this enhancement is only supported for shared-cores, for now and foreseeable future + // - all pods will be settled in `default` pool if not specified PodAnnotationCPUEnhancementCPUSet = "cpuset_pool" + // PodAnnotationCPUEnhancementSuppressionToleranceRate provides a mechanism to ensure + // the quality for reclaimed resources. since reclaimed resources will always change + // dynamically according to running states of none-reclaimed services, it may reach to + // a point that the resource contention is still be tolerable for none-reclaimed services, + // but the reclaimed services runs too slow and would rather be killed and rescheduled. + // in this case, the workload can use this enhancement to trigger eviction. + // + // - this enhancement is only supported for shared-cores, for now and foreseeable future PodAnnotationCPUEnhancementSuppressionToleranceRate = "suppression_tolerance_rate" ) @@ -52,12 +86,27 @@ const ( const ( PodAnnotationNetworkEnhancementKey = "katalyst.kubewharf.io/network_enhancement" + // PodAnnotationNetworkEnhancementNamespaceType provides a mechanism to select nic in different namespaces + // - PodAnnotationNetworkEnhancementNamespaceTypeHost + // - only select nic device in host namespace + // - admit failed if not possible + // - PodAnnotationNetworkEnhancementNamespaceTypeHostPrefer + // - prefer tp select nic device in non-host namespace + // - also accept nic device in non-host namespace if not possible + // - PodAnnotationNetworkEnhancementNamespaceTypeNotHost + // - only select nic device in non-host namespace + // - admit failed if not possible + // - PodAnnotationNetworkEnhancementNamespaceTypeNotHostPrefer + // - only select nic device in non-host namespace + // - also accept nic device in host namespace if not possible PodAnnotationNetworkEnhancementNamespaceType = "namespace_type" PodAnnotationNetworkEnhancementNamespaceTypeHost = "host_ns" PodAnnotationNetworkEnhancementNamespaceTypeHostPrefer = "host_ns_preferred" PodAnnotationNetworkEnhancementNamespaceTypeNotHost = "anti_host_ns" PodAnnotationNetworkEnhancementNamespaceTypeNotHostPrefer = "anti_host_ns_preferred" + // PodAnnotationNetworkEnhancementAffinityRestricted sets as true to indicate + // we must ensure the numa affinity for nic devices, and we should admit failed if not possible PodAnnotationNetworkEnhancementAffinityRestricted = "topology_affinity_restricted" PodAnnotationNetworkEnhancementAffinityRestrictedTrue = "true" ) diff --git a/pkg/consts/spd.go b/pkg/consts/spd.go index d6ba5d3..8a142b9 100644 --- a/pkg/consts/spd.go +++ b/pkg/consts/spd.go @@ -17,17 +17,17 @@ limitations under the License. package consts const ( + // PodAnnotationSPDNameKey is used to maintain corresponding spdName in pod + // annotation to make metaServer to target its spd more conveniently. PodAnnotationSPDNameKey = "spd.katalyst.kubewharf.io/name" ) // const variables for workload annotations about spd. const ( - // WorkloadAnnotationSPDEnableKey disables for workload means that we should not - // maintain spd CR and much less to calculate service profiling automatically + // WorkloadAnnotationSPDEnableKey provides a mechanism for white list when enabling spd, + // if it's set as false, we should not maintain spd CR or calculate service profiling automatically. WorkloadAnnotationSPDEnableKey = "spd.katalyst.kubewharf.io/enable" WorkloadAnnotationSPDEnabled = "true" - - WorkloadAnnotationSPDNameKey = "spd.katalyst.kubewharf.io/name" ) // const variables for spd annotations. diff --git a/pkg/consts/vpa.go b/pkg/consts/vpa.go index b198544..2316a64 100644 --- a/pkg/consts/vpa.go +++ b/pkg/consts/vpa.go @@ -31,6 +31,7 @@ const ( WorkloadAnnotationVPASelectorKey = "vpa.katalyst.kubewharf.io/selector" ) +// const variables for workload annotations about vpaRec. const ( VPAAnnotationVPARecNameKey = "vpa.katalyst.kubewharf.io/recName"