From 9355b715e4f3523a9cfb3d231217aabc154611d3 Mon Sep 17 00:00:00 2001 From: terassyi Date: Mon, 3 Jun 2024 11:01:57 +0900 Subject: [PATCH] refactor metrics Signed-off-by: terassyi --- docs/usage.md | 26 ++ .../base/webhook/admission_webhook_patch.yaml | 12 +- sart/src/proto/sart.v1.rs | 1 + sartcni/Cargo.lock | 361 +----------------- sartd/Cargo.lock | 1 + sartd/src/bgp/Cargo.lock | 1 + sartd/src/cmd/Cargo.lock | 1 + sartd/src/fib/Cargo.lock | 1 + sartd/src/kubernetes/Cargo.lock | 1 + sartd/src/kubernetes/src/agent.rs | 2 + sartd/src/kubernetes/src/agent/cni/server.rs | 61 ++- sartd/src/kubernetes/src/agent/context.rs | 159 ++++++++ sartd/src/kubernetes/src/agent/metrics.rs | 171 +++++++++ .../src/agent/reconciler/address_block.rs | 23 +- .../src/agent/reconciler/bgp_advertisement.rs | 16 +- .../src/agent/reconciler/bgp_peer.rs | 47 ++- .../src/agent/reconciler/bgp_peer_watcher.rs | 36 +- .../src/agent/reconciler/node_bgp.rs | 80 +++- sartd/src/kubernetes/src/agent/server.rs | 34 +- sartd/src/kubernetes/src/context.rs | 28 +- sartd/src/kubernetes/src/controller.rs | 2 + .../src/kubernetes/src/controller/context.rs | 198 ++++++++++ .../src/kubernetes/src/controller/metrics.rs | 181 +++++++++ .../controller/reconciler/address_block.rs | 52 ++- .../src/controller/reconciler/address_pool.rs | 40 +- .../reconciler/bgp_advertisement.rs | 83 +++- .../controller/reconciler/block_request.rs | 28 +- .../src/controller/reconciler/cluster_bgp.rs | 15 +- .../reconciler/endpointslice_watcher.rs | 14 +- .../src/controller/reconciler/node_watcher.rs | 16 +- .../controller/reconciler/service_watcher.rs | 21 +- sartd/src/kubernetes/src/controller/server.rs | 50 ++- .../kubernetes/src/crd/bgp_advertisement.rs | 10 + sartd/src/kubernetes/src/crd/bgp_peer.rs | 15 + sartd/src/kubernetes/src/crd/node_bgp.rs | 11 + sartd/src/kubernetes/src/fixture.rs | 38 +- sartd/src/kubernetes/src/lib.rs | 1 + sartd/src/kubernetes/src/metrics.rs | 89 +++++ .../tests/agent_address_block_test.rs | 17 +- .../tests/agent_bgp_advertisement_test.rs | 5 +- .../kubernetes/tests/agent_bgp_peer_test.rs | 5 +- .../kubernetes/tests/agent_cni_server_test.rs | 2 +- .../kubernetes/tests/agent_node_bgp_test.rs | 7 +- .../tests/controller_address_block_test.rs | 13 +- .../tests/controller_address_pool_pod_test.rs | 18 +- .../controller_address_pool_service_test.rs | 17 +- .../controller_bgp_advertisement_test.rs | 7 +- .../tests/controller_block_request_test.rs | 16 +- .../tests/controller_cluster_bgp_test.rs | 9 +- .../controller_endpointslice_watcher_test.rs | 12 +- .../tests/controller_service_watcher_test.rs | 6 +- .../src/kubernetes/tests/node_watcher_test.rs | 9 +- sartd/src/proto/src/sart.v1.rs | 1 - sartd/src/trace/Cargo.lock | 1 + sartd/src/trace/Cargo.toml | 1 + sartd/src/trace/src/telemetry.rs | 12 +- 56 files changed, 1539 insertions(+), 545 deletions(-) create mode 100644 sartd/src/kubernetes/src/agent/context.rs create mode 100644 sartd/src/kubernetes/src/agent/metrics.rs create mode 100644 sartd/src/kubernetes/src/controller/context.rs create mode 100644 sartd/src/kubernetes/src/controller/metrics.rs create mode 100644 sartd/src/kubernetes/src/metrics.rs diff --git a/docs/usage.md b/docs/usage.md index 6a5a181..9190351 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,3 +7,29 @@ Please refer to following links for details of each feature. - [bgp](./bgp.md) - [fib](./fib.md) - [kubernetes](./kubernetes.md) + +## Metrics + +### Controller + +|Name|Type|Description| +|---|---|---| +|sart_controller_reconciliation_total|Counter|Total count of reconciliations| +|sart_controller_reconciliation_errors_total|Counter|Total count of reconciliation errors| +|sart_controller_max_blocks|Gauge|The number of maximum allocatable address blocks| +|sart_controller_allocated_blocks|Gauge|The number of allocated address blocks| +|sart_controller_bgp_advertisements|Gauge|The number of BGP Advertisement| +|sart_controller_bgp_advertisement_status|Gauge|BGP Advertisement status| +|sart_controller_bgp_advertisement_backoff_count|Counter|The number of back off count of BGP Advertisement| + +### Agent + +|Name|Type|Description| +|---|---|---| +|sart_agent_reconciliation_total|Counter|Total count of reconciliations| +|sart_agent_reconciliation_errors_total|Counter|Total count of reconciliation errors| +|sart_agent_cni_call_total|Counter|Total count of CNI call| +|sart_agent_cni_call_errors_total|Counter|Total count of CNI call error| +|sart_agent_bgp_peer_status|Gauge|BGP peer status| +|sart_agent_node_bgp_status|Gauge|Node BGP status| +|sart_agent_node_bgp_backoff_count_total|Counter|NodeBGP backoff count| diff --git a/manifests/base/webhook/admission_webhook_patch.yaml b/manifests/base/webhook/admission_webhook_patch.yaml index 9875998..662ecb7 100644 --- a/manifests/base/webhook/admission_webhook_patch.yaml +++ b/manifests/base/webhook/admission_webhook_patch.yaml @@ -5,13 +5,13 @@ metadata: webhooks: - name: vbgppeer.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: vbgpadvertisement.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: vaddresspool.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" --- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration @@ -20,10 +20,10 @@ metadata: webhooks: - name: mbgppeer.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: maddressblock.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: mservice.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQU1JSk54ZXhCV2pua2kvaUNoUzNSZ042dnV4N01Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdOakEzTVRBMApNRFV3V2hnUE1qRXlOREExTVRReE1EUXdOVEJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWFFFZHJIZEJYZXdzZ0ZyYjAKalhlRmNOemxoKy9CNTk1NlprZFZiWTAyaXkxSG9DazRPNVQ1SENLbEFwSWg5Z2pzakdKWGd5dW5HSStscktWbQpjdFdUbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJRlRxTUJpSDBRYjY3K2xqTWtSNTlJSHhvVmVaQ3dFZDVTSGlISE5sRHlSSQpBaUVBM3NXSENHSnEzOFcvejVHMVlhaFFYYWV6RVlPSm5yVWFXLzg0SVVHaExZQT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" diff --git a/sart/src/proto/sart.v1.rs b/sart/src/proto/sart.v1.rs index 03510c9..498500d 100644 --- a/sart/src/proto/sart.v1.rs +++ b/sart/src/proto/sart.v1.rs @@ -1,3 +1,4 @@ +// This file is @generated by prost-build. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct HealthRequest {} diff --git a/sartcni/Cargo.lock b/sartcni/Cargo.lock index 2f8015c..9a4bd57 100644 --- a/sartcni/Cargo.lock +++ b/sartcni/Cargo.lock @@ -26,21 +26,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anyhow" version = "1.0.79" @@ -179,18 +164,6 @@ name = "built" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38d17f4d6e4dc36d1a02fbedc2753a096848e7c1b0772f7654eab8e2c927dd53" -dependencies = [ - "cargo-lock", - "chrono", - "git2", - "semver", -] - -[[package]] -name = "bumpalo" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytes" @@ -198,26 +171,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" -[[package]] -name = "cargo-lock" -version = "9.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e11c675378efb449ed3ce8de78d75d0d80542fc98487c26aba28eb3b82feac72" -dependencies = [ - "petgraph", - "semver", - "serde", - "toml", - "url", -] - [[package]] name = "cc" version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ - "jobserver", "libc", ] @@ -227,24 +186,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-targets 0.48.5", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - [[package]] name = "either" version = "1.9.0" @@ -285,15 +226,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - [[package]] name = "futures" version = "0.3.30" @@ -406,19 +338,6 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" -[[package]] -name = "git2" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" -dependencies = [ - "bitflags 2.4.1", - "libc", - "libgit2-sys", - "log", - "url", -] - [[package]] name = "glob" version = "0.3.1" @@ -427,9 +346,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.23" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b553656127a00601c8ae5590fcfdc118e4083a7924b6cf4ffc1ea4b99dc429d7" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -547,39 +466,6 @@ dependencies = [ "tokio-io-timeout", ] -[[package]] -name = "iana-time-zone" -version = "0.1.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "indexmap" version = "1.9.3" @@ -621,54 +507,12 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" -[[package]] -name = "jobserver" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "libc" version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" -[[package]] -name = "libgit2-sys" -version = "0.16.1+1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" -dependencies = [ - "cc", - "libc", - "libz-sys", - "pkg-config", -] - -[[package]] -name = "libz-sys" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linux-raw-sys" version = "0.4.12" @@ -725,15 +569,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "num-traits" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" -dependencies = [ - "autocfg", -] - [[package]] name = "num_cpus" version = "1.16.0" @@ -807,12 +642,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkg-config" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -977,8 +806,9 @@ checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" [[package]] name = "rscni" -version = "0.0.3" -source = "git+https://github.com/terassyi/rscni?branch=fix-error-result#e51f1a49d3047f26a45a40f7e56fcbbaa106b2a4" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87b1b458ac1db7570f6d084f19291a38e240928abd34565830889fef0b72d7d5" dependencies = [ "serde", "serde_json", @@ -987,9 +817,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.18.2" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199" +checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" dependencies = [ "futures", "futures-timer", @@ -999,9 +829,9 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.18.2" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605" +checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" dependencies = [ "cfg-if", "glob", @@ -1082,9 +912,6 @@ name = "semver" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" -dependencies = [ - "serde", -] [[package]] name = "serde" @@ -1117,15 +944,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_spanned" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.9" @@ -1195,21 +1013,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.35.1" @@ -1273,45 +1076,11 @@ dependencies = [ "tracing", ] -[[package]] -name = "toml" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - -[[package]] -name = "toml_datetime" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap 2.1.0", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", -] - [[package]] name = "tonic" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d560933a0de61cf715926b9cac824d4c883c2c43142f787595e48280c40a1d0e" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" dependencies = [ "async-stream", "async-trait", @@ -1336,9 +1105,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d021fc044c18582b9a2408cd0dd05b1596e3ecdb5c4df822bb0183545683889" +checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" dependencies = [ "prettyplease", "proc-macro2", @@ -1416,44 +1185,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "unicode-bidi" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "url" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "want" version = "0.3.1" @@ -1469,60 +1206,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasm-bindgen" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" - [[package]] name = "which" version = "4.4.2" @@ -1535,15 +1218,6 @@ dependencies = [ "rustix", ] -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.0", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -1675,12 +1349,3 @@ name = "windows_x86_64_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" - -[[package]] -name = "winnow" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" -dependencies = [ - "memchr", -] diff --git a/sartd/Cargo.lock b/sartd/Cargo.lock index 2f0deba..0b5f23b 100644 --- a/sartd/Cargo.lock +++ b/sartd/Cargo.lock @@ -2415,6 +2415,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/bgp/Cargo.lock b/sartd/src/bgp/Cargo.lock index c2024c4..702af96 100644 --- a/sartd/src/bgp/Cargo.lock +++ b/sartd/src/bgp/Cargo.lock @@ -1441,6 +1441,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/cmd/Cargo.lock b/sartd/src/cmd/Cargo.lock index 109a7d4..92a18df 100644 --- a/sartd/src/cmd/Cargo.lock +++ b/sartd/src/cmd/Cargo.lock @@ -2406,6 +2406,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/fib/Cargo.lock b/sartd/src/fib/Cargo.lock index 41c3788..7669e46 100644 --- a/sartd/src/fib/Cargo.lock +++ b/sartd/src/fib/Cargo.lock @@ -1485,6 +1485,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/kubernetes/Cargo.lock b/sartd/src/kubernetes/Cargo.lock index eb35e95..1dac080 100644 --- a/sartd/src/kubernetes/Cargo.lock +++ b/sartd/src/kubernetes/Cargo.lock @@ -2305,6 +2305,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/kubernetes/src/agent.rs b/sartd/src/kubernetes/src/agent.rs index 189c420..cf3856c 100644 --- a/sartd/src/kubernetes/src/agent.rs +++ b/sartd/src/kubernetes/src/agent.rs @@ -1,6 +1,8 @@ mod bgp; pub mod cni; pub mod config; +pub mod context; pub mod error; +pub mod metrics; pub mod reconciler; pub mod server; diff --git a/sartd/src/kubernetes/src/agent/cni/server.rs b/sartd/src/kubernetes/src/agent/cni/server.rs index bb645d4..7491e09 100644 --- a/sartd/src/kubernetes/src/agent/cni/server.rs +++ b/sartd/src/kubernetes/src/agent/cni/server.rs @@ -30,10 +30,13 @@ use tokio_stream::wrappers::UnixListenerStream; use tonic::{async_trait, transport::Server, Request, Response, Status}; use crate::{ - agent::cni::{ - netlink::{self, ContainerLinkInfo}, - netns::{self, get_current_netns, NetNS}, - pod::{cleanup_links, setup_links}, + agent::{ + cni::{ + netlink::{self, ContainerLinkInfo}, + netns::{self, get_current_netns, NetNS}, + pod::{cleanup_links, setup_links}, + }, + metrics::Metrics, }, crd::{ address_block::{AddressBlock, ADDRESS_BLOCK_NODE_LABEL}, @@ -66,6 +69,7 @@ struct CNIServerInner { pub struct CNIServer { // consider some better type. inner: Arc>, + metrics: Arc>, } impl CNIServer { @@ -76,11 +80,13 @@ impl CNIServer { node_addr: IpAddr, table: u32, receiver: UnboundedReceiver, + metrics: Arc>, ) -> CNIServer { CNIServer { inner: Arc::new(Mutex::new(CNIServerInner::new( client, allocator, node, node_addr, table, receiver, ))), + metrics, } } @@ -135,7 +141,7 @@ impl CNIServerInner { let pod = pod_api.get(&pod_info.name).await.map_err(Error::Kube)?; let pool = self.get_pool(&pod_info, &pod).await?; - if self.allocation.get(&pod_key).is_some() { + if self.allocation.contains_key(&pod_key) { return Err(Error::AlreadyConfigured(pod_key)); } @@ -791,7 +797,12 @@ impl CniApi for CNIServer { let args = req.get_ref(); let mut inner = self.inner.lock().await; match inner.add(args).await { - Ok(res) => Ok(Response::new(res)), + Ok(res) => { + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_call("add"); + } + Ok(Response::new(res)) + } Err(e) => { tracing::error!(error=?e, "Failed to add"); let cni_err = rscni::error::Error::from(e); @@ -800,6 +811,9 @@ impl CniApi for CNIServer { msg: cni_err.to_string(), details: cni_err.details(), }; + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_errors("add", &format!("{}", u32::from(&cni_err))); + } let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { @@ -834,7 +848,12 @@ impl CniApi for CNIServer { let args = req.get_ref(); let mut inner = self.inner.lock().await; match inner.del(args).await { - Ok(res) => Ok(Response::new(res)), + Ok(res) => { + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_call("del"); + } + Ok(Response::new(res)) + } Err(e) => { tracing::error!(error=?e, "Failed to delete"); let cni_err = rscni::error::Error::from(e); @@ -843,6 +862,9 @@ impl CniApi for CNIServer { msg: cni_err.to_string(), details: cni_err.details(), }; + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_errors("del", &format!("{}", u32::from(&cni_err))); + } let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { @@ -877,7 +899,12 @@ impl CniApi for CNIServer { let args = req.get_ref(); let inner = self.inner.lock().await; match inner.check(args).await { - Ok(res) => Ok(Response::new(res)), + Ok(res) => { + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_call("check"); + } + Ok(Response::new(res)) + } Err(e) => { tracing::error!(error=?e, "Failed to check"); let cni_err = rscni::error::Error::from(e); @@ -886,6 +913,9 @@ impl CniApi for CNIServer { msg: cni_err.to_string(), details: cni_err.details(), }; + if let Ok(metrics) = self.metrics.lock() { + metrics.cni_errors("check", &format!("{}", u32::from(&cni_err))); + } let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { @@ -1033,16 +1063,23 @@ impl From for rscni::error::Error { #[cfg(test)] mod tests { - use std::{net::IpAddr, str::FromStr, sync::Arc}; + use std::{ + net::IpAddr, + str::FromStr, + sync::{Arc, Mutex}, + }; use ipnet::IpNet; use kube::Client; use sartd_ipam::manager::{AllocatorSet, Block}; use tokio::sync::mpsc::unbounded_channel; - use crate::agent::cni::{ - pod::{PodAllocation, PodInfo}, - server::{CNIServerInner, CNI_ROUTE_TABLE_ID}, + use crate::agent::{ + cni::{ + pod::{PodAllocation, PodInfo}, + server::{CNIServerInner, CNI_ROUTE_TABLE_ID}, + }, + metrics::Metrics, }; #[tokio::test] diff --git a/sartd/src/kubernetes/src/agent/context.rs b/sartd/src/kubernetes/src/agent/context.rs new file mode 100644 index 0000000..f2411d9 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/context.rs @@ -0,0 +1,159 @@ +use std::{sync::{Arc, Mutex}, time::Duration}; + +use chrono::{DateTime, Utc}; +pub use kube::{ + core::{DynamicResourceScope, ObjectMeta}, + runtime::{ + controller::Action, + events::{Recorder, Reporter}, + }, + Client, Resource, +}; +use serde::Serialize; +use tokio::sync::RwLock; + +use sartd_trace::error::TraceableError; + +use super::metrics::Metrics; + +pub trait Ctx { + fn metrics(&self) -> Arc>; + fn client(&self) -> &Client; +} + +// Context for our reconciler +#[derive(Clone)] +pub struct Context { + // Kubernetes client + pub client: Client, + // Reconcile interval + pub interval: u64, + // Diagnostics read by the web server + pub diagnostics: Arc>, + // Prometheus metrics + pub metrics: Arc>, +} + +impl Ctx for Context { + fn client(&self) -> &Client { + &self.client + } + + fn metrics(&self) -> Arc> { + self.metrics.clone() + } +} + +pub struct ContextWith { + pub(crate) inner: Context, + pub component: T, +} + +impl Ctx for ContextWith { + fn client(&self) -> &Client { + &self.inner.client + } + + fn metrics(&self) -> Arc> { + self.inner.metrics.clone() + } +} + +#[derive(Debug, Clone, Default)] +pub struct State { + pub diagnostics: Arc>, + pub registry: prometheus::Registry, +} + +impl State { + pub fn new(component: &str) -> State { + State { + diagnostics: Arc::new(RwLock::new(Diagnostics::new(component.to_string()))), + registry: prometheus::Registry::default(), + } + } + /// Metrics getter + pub fn metrics(&self) -> Vec { + self.registry.gather() + } + + /// State getter + pub async fn diagnostics(&self) -> Diagnostics { + self.diagnostics.read().await.clone() + } + + // Create a Controller Context that can update State + pub fn to_context(&self, client: Client, interval: u64, metrics: Arc>) -> Arc { + Arc::new(Context { + client, + interval, + metrics, + diagnostics: self.diagnostics.clone(), + }) + } + pub fn to_context_with( + &self, + client: Client, + interval: u64, + component: T, + metrics: Arc>, + ) -> Arc> { + Arc::new(ContextWith { + inner: Context { + client, + interval, + diagnostics: self.diagnostics.clone(), + metrics, + }, + component, + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct Diagnostics { + #[serde(deserialize_with = "from_ts")] + pub last_event: DateTime, + #[serde(skip)] + pub reporter: Reporter, +} + +impl Diagnostics { + pub fn new(component: String) -> Self { + Self { + last_event: Utc::now(), + reporter: component.into(), + } + } +} + +impl Default for Diagnostics { + fn default() -> Self { + Self { + last_event: Utc::now(), + reporter: "sart".into(), + } + } +} + +impl Diagnostics { + fn recorder>( + &self, + client: Client, + res: T, + ) -> Recorder { + Recorder::new(client, self.reporter.clone(), res.object_ref(&())) + } +} + +#[tracing::instrument(skip_all)] +pub fn error_policy, E: TraceableError, C: Ctx>( + resource: Arc, + error: &E, + ctx: Arc, +) -> Action { + tracing::warn!("reconcile failed: {:?}", error); + let metrics = ctx.metrics(); + metrics.lock().unwrap().reconcile_failure(resource.as_ref()); + Action::requeue(Duration::from_secs(10)) +} diff --git a/sartd/src/kubernetes/src/agent/metrics.rs b/sartd/src/kubernetes/src/agent/metrics.rs new file mode 100644 index 0000000..da053f6 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/metrics.rs @@ -0,0 +1,171 @@ +use kube::Resource; +use prometheus::{histogram_opts, opts, HistogramVec, IntCounterVec}; +use prometheus::{IntGaugeVec, Registry}; +use tokio::time::Instant; + +#[derive(Debug, Clone)] +pub struct Metrics { + pub reconciliations: IntCounterVec, + pub failures: IntCounterVec, + pub reconcile_duration: HistogramVec, + pub cni_call: IntCounterVec, + pub cni_errors: IntCounterVec, + pub bgp_peer_status: IntGaugeVec, + pub node_bgp_status: IntGaugeVec, + pub node_bgp_backoff_count: IntCounterVec, +} + +impl Default for Metrics { + fn default() -> Self { + let reconcile_duration = HistogramVec::new( + histogram_opts!( + "sart_agent_reconcile_duration_seconds", + "The duration of reconcile to complete in seconds" + ) + .buckets(vec![0.01, 0.1, 0.25, 0.5, 1., 5., 15., 60.]), + &[], + ) + .unwrap(); + let failures = IntCounterVec::new( + opts!( + "sart_agent_reconciliation_errors_total", + "Total count of reconciliation errors", + ), + &["resource", "instance"], + ) + .unwrap(); + let reconciliations = IntCounterVec::new( + opts!( + "sart_agent_reconciliation_total", + "Total count of reconciliations", + ), + &["resource", "instance"], + ) + .unwrap(); + let cni_call = IntCounterVec::new( + opts!("sart_agent_cni_call_total", "Total count of CNI call",), + &["method"], + ) + .unwrap(); + let cni_errors = IntCounterVec::new( + opts!( + "sart_agent_cni_call_errors_total", + "Total count of CNI call error" + ), + &["method", "code"], + ) + .unwrap(); + let bgp_peer_status = IntGaugeVec::new( + opts!("sart_agent_bgp_peer_status", "BGP peer status"), + &["peer", "status"], + ) + .unwrap(); + let node_bgp_status = IntGaugeVec::new( + opts!("sart_agent_node_bgp_status", "Node BGP status"), + &["name", "status"], + ) + .unwrap(); + let node_bgp_backoff_count = IntCounterVec::new( + opts!( + "sart_agent_node_bgp_backoff_count_total", + "NodeBGP backoff count" + ), + &["name"], + ) + .unwrap(); + Metrics { + reconciliations, + failures, + reconcile_duration, + cni_call, + cni_errors, + bgp_peer_status, + node_bgp_status, + node_bgp_backoff_count, + } + } +} + +impl Metrics { + pub fn register(self, registry: &Registry) -> Result { + registry.register(Box::new(self.reconciliations.clone()))?; + registry.register(Box::new(self.failures.clone()))?; + registry.register(Box::new(self.reconcile_duration.clone()))?; + registry.register(Box::new(self.cni_call.clone()))?; + registry.register(Box::new(self.cni_errors.clone()))?; + registry.register(Box::new(self.bgp_peer_status.clone()))?; + registry.register(Box::new(self.node_bgp_status.clone()))?; + registry.register(Box::new(self.node_bgp_backoff_count.clone()))?; + Ok(self) + } + + pub fn reconcile_failure>(&self, resource: &T) { + self.failures + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } + + pub fn reconciliation>(&self, resource: &T) { + self.reconciliations + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } + + pub fn cni_call(&self, method: &str) { + self.cni_call.with_label_values(&[method]).inc() + } + + pub fn cni_errors(&self, method: &str, code: &str) { + self.cni_errors.with_label_values(&[method, code]).inc() + } + + pub fn bgp_peer_status_up(&self, peer: &str, status: &str) { + self.bgp_peer_status + .with_label_values(&[peer, status]) + .set(1) + } + + pub fn bgp_peer_status_down(&self, peer: &str, status: &str) { + self.bgp_peer_status + .with_label_values(&[peer, status]) + .set(0) + } + + pub fn node_bgp_status_up(&self, name: &str, status: &str) { + self.node_bgp_status + .with_label_values(&[name, status]) + .set(1) + } + + pub fn node_bgp_status_down(&self, name: &str, status: &str) { + self.node_bgp_status + .with_label_values(&[name, status]) + .set(0) + } + + pub fn node_bgp_backoff_count(&self, name: &str) { + self.node_bgp_backoff_count.with_label_values(&[name]).inc() + } +} + +/// Smart function duration measurer +/// +/// Relies on Drop to calculate duration and register the observation in the histogram +pub struct ReconcileMeasurer { + start: Instant, + metric: HistogramVec, +} + +impl Drop for ReconcileMeasurer { + fn drop(&mut self) { + #[allow(clippy::cast_precision_loss)] + let duration = self.start.elapsed().as_millis() as f64 / 1000.0; + self.metric.with_label_values(&[]).observe(duration); + } +} diff --git a/sartd/src/kubernetes/src/agent/reconciler/address_block.rs b/sartd/src/kubernetes/src/agent/reconciler/address_block.rs index ea9d72e..c260280 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/address_block.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/address_block.rs @@ -1,9 +1,9 @@ -use std::{collections::BTreeMap, str::FromStr, sync::Arc, time::Duration}; +use std::{collections::BTreeMap, str::FromStr, sync::{Arc, Mutex}, time::Duration}; use futures::StreamExt; use ipnet::IpNet; use kube::{ - api::{DeleteParams, ListParams, PatchParams, PostParams}, + api::{DeleteParams, ListParams, PostParams}, core::ObjectMeta, runtime::{ controller::Action, @@ -17,8 +17,12 @@ use sartd_ipam::manager::{AllocatorSet, Block}; use tokio::sync::mpsc::UnboundedSender; use crate::{ - agent::{error::Error, reconciler::node_bgp::ENV_HOSTNAME}, - context::{error_policy, ContextWith, Ctx, State}, + agent::{ + context::{error_policy, ContextWith, Ctx, State}, + error::Error, + metrics::Metrics, + reconciler::node_bgp::ENV_HOSTNAME, + }, crd::{ address_block::{AddressBlock, ADDRESS_BLOCK_FINALIZER_AGENT, ADDRESS_BLOCK_NODE_LABEL}, address_pool::{AddressType, ADDRESS_POOL_ANNOTATION}, @@ -41,6 +45,8 @@ pub async fn reconciler( ab: Arc, ctx: Arc>>, ) -> Result { + + ctx.metrics().lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(ab.as_ref()); // handle only Pod type if ab.spec.r#type.ne(&AddressType::Pod) { return Ok(Action::await_change()); @@ -282,7 +288,12 @@ async fn cleanup( Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64, pod_allocator: Arc) { +pub async fn run( + state: State, + interval: u64, + pod_allocator: Arc, + metrics: Arc>, +) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -304,7 +315,7 @@ pub async fn run(state: State, interval: u64, pod_allocator: Arc) .run( reconciler, error_policy::>>, - state.to_context_with::>(client, interval, pod_allocator), + state.to_context_with::>(client, interval, pod_allocator, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs b/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs index 7182d89..85ce401 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs @@ -1,4 +1,4 @@ -use std::{sync::Arc, time::Duration}; +use std::{sync::{Arc, Mutex}, time::Duration}; use futures::StreamExt; use kube::{ @@ -9,8 +9,12 @@ use kube::{ use tracing::{field, Span}; use crate::{ - agent::{bgp::speaker, error::Error}, - context::{error_policy, Context, State}, + agent::{ + bgp::speaker, + context::{error_policy, Context, State, Ctx}, + error::Error, + metrics::Metrics, + }, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement, Protocol}, bgp_peer::{BGPPeer, BGPPeerConditionStatus}, @@ -22,7 +26,7 @@ use crate::{ use super::node_bgp::{DEFAULT_SPEAKER_TIMEOUT, ENV_HOSTNAME}; #[tracing::instrument(skip_all, fields(trace_id))] -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -47,7 +51,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) @@ -58,6 +62,8 @@ pub async fn run(state: State, interval: u64) { pub async fn reconciler(ba: Arc, ctx: Arc) -> Result { let ns = get_namespace::(&ba).map_err(Error::KubeLibrary)?; + ctx.metrics().lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(ba.as_ref()); + let bgp_advertisements = Api::::namespaced(ctx.client.clone(), &ns); reconcile(&bgp_advertisements, &ba, ctx).await diff --git a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs index ada97af..87eae40 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs @@ -1,9 +1,12 @@ -use std::{sync::Arc, time::Duration}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; use futures::StreamExt; use k8s_openapi::api::discovery::v1::EndpointSlice; use kube::{ - api::{ListParams, PostParams}, + api::{ListParams, Patch, PatchParams, PostParams}, runtime::{ controller::Action, finalizer::{finalizer, Event}, @@ -15,8 +18,12 @@ use kube::{ use tracing::{field, Span}; use crate::{ - agent::{bgp::speaker, error::Error}, - context::{error_policy, Context, State}, + agent::{ + bgp::speaker, + context::{error_policy, Context, Ctx, State}, + error::Error, + metrics::Metrics, + }, controller::reconciler::endpointslice_watcher::ENDPOINTSLICE_TRIGGER, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement}, @@ -35,6 +42,11 @@ use super::node_bgp::{DEFAULT_SPEAKER_TIMEOUT, ENV_HOSTNAME}; pub async fn reconciler(bp: Arc, ctx: Arc) -> Result { let bgp_peers = Api::::all(ctx.client.clone()); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(bp.as_ref()); + finalizer(&bgp_peers, BGP_PEER_FINALIZER, bp, |event| async { match event { Event::Apply(bp) => reconcile(&bgp_peers, &bp, ctx).await, @@ -113,6 +125,21 @@ async fn reconcile(api: &Api, bp: &BGPPeer, ctx: Arc) -> Resul new_state = ?new_state, "peer state is changed" ); + // update metrics + { + let metrics = ctx.metrics(); + let metrics = metrics + .lock() + .map_err(|_| Error::FailedToGetLock)?; + metrics.bgp_peer_status_down( + &bp.name_any(), + &format!("{}", cond.status), + ); + metrics.bgp_peer_status_up( + &bp.name_any(), + &format!("{new_state}"), + ); + } conditions.push(BGPPeerCondition { status: BGPPeerConditionStatus::try_from(status as i32) .map_err(Error::CRD)?, @@ -153,6 +180,10 @@ async fn reconcile(api: &Api, bp: &BGPPeer, ctx: Arc) -> Resul status: state, reason: "Synchronized by BGPPeer reconciler".to_string(), }]); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .bgp_peer_status_up(&bp.name_any(), &format!("{state}")); need_status_update = true; } }, @@ -173,6 +204,10 @@ async fn reconcile(api: &Api, bp: &BGPPeer, ctx: Arc) -> Resul reason: "Synchronized by BGPPeer reconciler".to_string(), }]), }); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .bgp_peer_status_up(&bp.name_any(), &format!("{state}")); need_status_update = true; } } @@ -416,7 +451,7 @@ async fn cleanup(_api: &Api, bp: &BGPPeer, ctx: Arc) -> Result } #[tracing::instrument(skip_all, fields(trace_id))] -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -439,7 +474,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer_watcher.rs b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer_watcher.rs index 14007b8..0ea789b 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer_watcher.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer_watcher.rs @@ -1,3 +1,5 @@ +use std::sync::{Arc, Mutex}; + use k8s_openapi::api::discovery::v1::EndpointSlice; use kube::{ api::{ListParams, Patch, PatchParams, PostParams}, @@ -10,7 +12,7 @@ use sartd_proto::sart::{ use tonic::{transport::Server, Request, Response, Status}; use crate::{ - agent::error::Error, + agent::{error::Error, metrics::Metrics}, controller::reconciler::endpointslice_watcher::ENDPOINTSLICE_TRIGGER, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement}, @@ -22,12 +24,17 @@ use crate::{ pub struct BGPPeerStateWatcher { pub client: Client, pub api: Api, + pub metrics: Arc>, } impl BGPPeerStateWatcher { - pub fn new(client: Client) -> Self { + pub fn new(client: Client, metrics: Arc>) -> Self { let api = Api::::all(client.clone()); - Self { client, api } + Self { + client, + api, + metrics, + } } } @@ -71,6 +78,14 @@ impl BgpExporterApi for BGPPeerStateWatcher { status: state, reason: "Synchronized by watcher".to_string(), }); + if let Ok(metrics) = self.metrics.lock() { + metrics.bgp_peer_status_down( + &new_bp.name_any(), + &format!("{}", old_status), + ); + metrics + .bgp_peer_status_up(&new_bp.name_any(), &format!("{}", state)); + } if state.eq(&BGPPeerConditionStatus::Established) { established = true; } @@ -87,6 +102,9 @@ impl BgpExporterApi for BGPPeerStateWatcher { status: state, reason: "Synchronized by watcher".to_string(), }); + if let Ok(metrics) = self.metrics.lock() { + metrics.bgp_peer_status_up(&new_bp.name_any(), &format!("{}", state)); + } if state.eq(&BGPPeerConditionStatus::Established) { established = true; } @@ -97,6 +115,9 @@ impl BgpExporterApi for BGPPeerStateWatcher { status: state, reason: "Synchronized by watcher".to_string(), }]); + if let Ok(metrics) = self.metrics.lock() { + metrics.bgp_peer_status_up(&new_bp.name_any(), &format!("{}", state)); + } if state.eq(&BGPPeerConditionStatus::Established) { established = true; } @@ -110,6 +131,9 @@ impl BgpExporterApi for BGPPeerStateWatcher { reason: "Synchronized by watcher".to_string(), }]), }); + if let Ok(metrics) = self.metrics.lock() { + metrics.bgp_peer_status_up(&new_bp.name_any(), &format!("{}", state)); + } if state.eq(&BGPPeerConditionStatus::Established) { established = true; } @@ -235,7 +259,7 @@ impl BgpExporterApi for BGPPeerStateWatcher { } #[tracing::instrument()] -pub async fn run(endpoint: &str) { +pub async fn run(endpoint: &str, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube config"); @@ -245,7 +269,9 @@ pub async fn run(endpoint: &str) { tracing::info!("Peer state watcher is started at {}", endpoint); Server::builder() - .add_service(BgpExporterApiServer::new(BGPPeerStateWatcher::new(client))) + .add_service(BgpExporterApiServer::new(BGPPeerStateWatcher::new( + client, metrics, + ))) .serve(sock_addr) .await .unwrap(); diff --git a/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs b/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs index c8387f8..8c384ce 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs @@ -1,4 +1,7 @@ -use std::{sync::Arc, time::Duration}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; use futures::StreamExt; @@ -14,8 +17,12 @@ use kube::{ use tracing::{field, Span}; use crate::{ - agent::{bgp::speaker, error::Error}, - context::{error_policy, Context, State}, + agent::{ + bgp::speaker, + context::{error_policy, Context, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement}, bgp_peer::{BGPPeer, BGPPeerCondition, BGPPeerConditionStatus}, @@ -34,6 +41,11 @@ pub const DEFAULT_SPEAKER_TIMEOUT: u64 = 10; pub async fn reconciler(nb: Arc, ctx: Arc) -> Result { let node_bgps = Api::::all(ctx.client.clone()); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(nb.as_ref()); + finalizer(&node_bgps, NODE_BGP_FINALIZER, nb, |event| async { match event { Event::Apply(nb) => reconcile(&node_bgps, &nb, ctx).await, @@ -78,6 +90,19 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul ) .await .map_err(Error::Kube)?; + { + let metrics = ctx.metrics(); + let metrics = metrics.lock().map_err(|_| Error::FailedToGetLock)?; + metrics.node_bgp_status_up( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Unavailable), + ); + metrics.node_bgp_status_down( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Available), + ); + metrics.node_bgp_backoff_count(&nb.name_any()); + } } tracing::warn!( name = nb.name_any(), @@ -111,6 +136,19 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul router_id = nb.spec.router_id, "backoff NodeBGP" ); + { + let metrics = ctx.metrics(); + let metrics = metrics.lock().map_err(|_| Error::FailedToGetLock)?; + metrics.node_bgp_status_up( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Unavailable), + ); + metrics.node_bgp_status_down( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Available), + ); + metrics.node_bgp_backoff_count(&nb.name_any()); + } backoff_advertisements(nb, &ctx.client.clone()).await?; tracing::warn!( name = nb.name_any(), @@ -198,6 +236,18 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul ) .await .map_err(Error::Kube)?; + { + let metrics = ctx.metrics(); + let metrics = metrics.lock().map_err(|_| Error::FailedToGetLock)?; + metrics.node_bgp_status_down( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Unavailable), + ); + metrics.node_bgp_status_up( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Available), + ); + } // add peer's backoff count let bgp_peer_api = Api::::all(ctx.client.clone()); @@ -267,6 +317,26 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul reason: NodeBGPConditionReason::InvalidConfiguration, } }; + { + let metrics = ctx.metrics(); + let metrics = metrics.lock().map_err(|_| Error::FailedToGetLock)?; + match cond.status { + NodeBGPConditionStatus::Available => { + metrics.node_bgp_status_up(&nb.name_any(), &format!("{}", cond.status)); + metrics.node_bgp_status_down( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Unavailable), + ); + } + NodeBGPConditionStatus::Unavailable => { + metrics.node_bgp_status_up(&nb.name_any(), &format!("{}", cond.status)); + metrics.node_bgp_status_down( + &nb.name_any(), + &format!("{}", NodeBGPConditionStatus::Available), + ); + } + } + } let mut need_status_update = true; let mut new_nb = nb.clone(); @@ -400,7 +470,7 @@ async fn cleanup(_api: &Api, nb: &NodeBGP, _ctx: Arc) -> Resul } #[tracing::instrument(skip_all)] -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -425,7 +495,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/agent/server.rs b/sartd/src/kubernetes/src/agent/server.rs index a5650a5..cef5e25 100644 --- a/sartd/src/kubernetes/src/agent/server.rs +++ b/sartd/src/kubernetes/src/agent/server.rs @@ -1,5 +1,5 @@ -use std::net::IpAddr; use std::sync::Arc; +use std::{net::IpAddr, sync::Mutex}; use actix_web::{ get, middleware, web::Data, App, HttpRequest, HttpResponse, HttpServer, Responder, @@ -13,9 +13,16 @@ use sartd_ipam::manager::AllocatorSet; use sartd_trace::init::{prepare_tracing, TraceConfig}; use tokio::sync::mpsc::unbounded_channel; -use crate::agent::{cni::{self, server::{CNIServer, CNI_ROUTE_TABLE_ID}}, reconciler::address_block::PodAllocator}; +use crate::agent::{ + cni::{ + self, + server::{CNIServer, CNI_ROUTE_TABLE_ID}, + }, + context::State, + metrics::Metrics, + reconciler::address_block::PodAllocator, +}; use crate::config::Mode; -use crate::context::State; use crate::crd::address_block::AddressBlock; use super::config::Config; @@ -80,23 +87,32 @@ async fn run(a: Agent, trace_config: TraceConfig) { tracing::info!("Start Agent Reconcilers"); + let metrics = Arc::new(Mutex::new( + Metrics::default().register(&state.registry).unwrap(), + )); + let node_bgp_state = state.clone(); + let nb_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::node_bgp::run(node_bgp_state, a.requeue_interval).await; + reconciler::node_bgp::run(node_bgp_state, a.requeue_interval, nb_metrics).await; }); let bgp_peer_state = state.clone(); + let bp_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::bgp_peer::run(bgp_peer_state, a.requeue_interval).await; + reconciler::bgp_peer::run(bgp_peer_state, a.requeue_interval, bp_metrics).await; }); let bgp_advertisement_state = state.clone(); + let ba_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::bgp_advertisement::run(bgp_advertisement_state, a.requeue_interval).await; + reconciler::bgp_advertisement::run(bgp_advertisement_state, a.requeue_interval, ba_metrics) + .await; }); + let pw_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::bgp_peer_watcher::run(&a.peer_state_watcher).await; + reconciler::bgp_peer_watcher::run(&a.peer_state_watcher, pw_metrics).await; }); if a.mode.eq(&Mode::CNI) || a.mode.eq(&Mode::Dual) { @@ -109,18 +125,21 @@ async fn run(a: Agent, trace_config: TraceConfig) { }); let address_block_state = state.clone(); + let ab_metrics = metrics.clone(); let ab_pod_allocator = pod_allocator.clone(); tokio::spawn(async move { reconciler::address_block::run( address_block_state, a.requeue_interval, ab_pod_allocator, + ab_metrics, ) .await; }); let node_name = std::env::var("HOSTNAME").unwrap(); // get node internal ip + let cni_metrics = metrics.clone(); let node_addr = get_node_addr(&node_name).await; let kube_client = Client::try_default().await.unwrap(); let cni_server = CNIServer::new( @@ -130,6 +149,7 @@ async fn run(a: Agent, trace_config: TraceConfig) { node_addr, CNI_ROUTE_TABLE_ID, receiver, + cni_metrics, ); let cni_endpoint = a.cni_endpoint.expect("cni endpoint must be given"); diff --git a/sartd/src/kubernetes/src/context.rs b/sartd/src/kubernetes/src/context.rs index 5f95c5b..fd4295b 100644 --- a/sartd/src/kubernetes/src/context.rs +++ b/sartd/src/kubernetes/src/context.rs @@ -1,4 +1,4 @@ -use std::{sync::Arc, time::Duration}; +use std::{sync::{Arc, Mutex}, time::Duration}; use chrono::{DateTime, Utc}; pub use kube::{ @@ -12,10 +12,12 @@ pub use kube::{ use serde::Serialize; use tokio::sync::RwLock; -use sartd_trace::{error::TraceableError, metrics::Metrics}; +use sartd_trace::error::TraceableError; + +use crate::metrics::Metrics; pub trait Ctx { - fn metrics(&self) -> &Metrics; + fn metrics(&self) -> Arc>; fn client(&self) -> &Client; } @@ -29,7 +31,7 @@ pub struct Context { // Diagnostics read by the web server pub diagnostics: Arc>, // Prometheus metrics - pub metrics: Metrics, + pub metrics: Arc>, } impl Ctx for Context { @@ -37,8 +39,8 @@ impl Ctx for Context { &self.client } - fn metrics(&self) -> &Metrics { - &self.metrics + fn metrics(&self) -> Arc> { + self.metrics.clone() } } @@ -52,8 +54,8 @@ impl Ctx for ContextWith { &self.inner.client } - fn metrics(&self) -> &Metrics { - &self.inner.metrics + fn metrics(&self) -> Arc> { + self.inner.metrics.clone() } } @@ -81,11 +83,11 @@ impl State { } // Create a Controller Context that can update State - pub fn to_context(&self, client: Client, interval: u64) -> Arc { + pub fn to_context(&self, client: Client, interval: u64, metrics: Arc>) -> Arc { Arc::new(Context { client, interval, - metrics: Metrics::default().register(&self.registry).unwrap(), + metrics, diagnostics: self.diagnostics.clone(), }) } @@ -94,13 +96,14 @@ impl State { client: Client, interval: u64, component: T, + metrics: Arc>, ) -> Arc> { Arc::new(ContextWith { inner: Context { client, interval, diagnostics: self.diagnostics.clone(), - metrics: Metrics::default().register(&self.registry).unwrap(), + metrics, }, component, }) @@ -150,6 +153,7 @@ pub fn error_policy, E: TraceableError, C: Ctx>( ctx: Arc, ) -> Action { tracing::warn!("reconcile failed: {:?}", error); - ctx.metrics().reconcile_failure(resource.as_ref(), error); + let metrics = ctx.metrics(); + metrics.lock().unwrap().reconcile_failure(resource.as_ref()); Action::requeue(Duration::from_secs(10)) } diff --git a/sartd/src/kubernetes/src/controller.rs b/sartd/src/kubernetes/src/controller.rs index d98ddcf..e709387 100644 --- a/sartd/src/kubernetes/src/controller.rs +++ b/sartd/src/kubernetes/src/controller.rs @@ -1,5 +1,7 @@ pub mod config; +pub mod context; pub mod error; +pub mod metrics; pub mod reconciler; pub mod server; pub mod webhook; diff --git a/sartd/src/kubernetes/src/controller/context.rs b/sartd/src/kubernetes/src/controller/context.rs new file mode 100644 index 0000000..5b673c4 --- /dev/null +++ b/sartd/src/kubernetes/src/controller/context.rs @@ -0,0 +1,198 @@ +use std::{sync::{Arc, Mutex}, time::Duration}; + +use chrono::{DateTime, Utc}; +use http::{Request, Response}; +use hyper::Body; +pub use kube::{ + core::DynamicResourceScope, + runtime::{ + controller::Action, + events::{Recorder, Reporter}, + }, + Client, Resource, +}; +use prometheus::Registry; +use serde::Serialize; +use tokio::sync::RwLock; + +use sartd_trace::error::TraceableError; + +use crate::fixture::reconciler::ApiServerVerifier; + +use super::metrics::Metrics; + +pub trait Ctx { + fn metrics(&self) -> Arc>; + fn client(&self) -> &Client; +} + +// Context for our reconciler +#[derive(Clone)] +pub struct Context { + // Kubernetes client + pub client: Client, + // Reconcile interval + pub interval: u64, + // Diagnostics read by the web server + pub diagnostics: Arc>, + // Prometheus metrics + pub metrics: Arc>, +} + +impl Ctx for Context { + fn client(&self) -> &Client { + &self.client + } + + fn metrics(&self) -> Arc> { + self.metrics.clone() + } +} + +pub struct ContextWith { + pub(crate) inner: Context, + pub component: T, +} + +impl Ctx for ContextWith { + fn client(&self) -> &Client { + &self.inner.client + } + + fn metrics(&self) -> Arc> { + self.inner.metrics.clone() + } +} + +#[derive(Debug, Clone, Default)] +pub struct State { + pub diagnostics: Arc>, + pub registry: prometheus::Registry, +} + +impl State { + pub fn new(component: &str) -> State { + State { + diagnostics: Arc::new(RwLock::new(Diagnostics::new(component.to_string()))), + registry: prometheus::Registry::default(), + } + } + /// Metrics getter + pub fn metrics(&self) -> Vec { + self.registry.gather() + } + + /// State getter + pub async fn diagnostics(&self) -> Diagnostics { + self.diagnostics.read().await.clone() + } + + // Create a Controller Context that can update State + pub fn to_context(&self, client: Client, interval: u64, metrics: Arc>) -> Arc { + Arc::new(Context { + client, + interval, + metrics, + diagnostics: self.diagnostics.clone(), + }) + } + pub fn to_context_with( + &self, + client: Client, + interval: u64, + component: T, + metrics: Arc>, + ) -> Arc> { + Arc::new(ContextWith { + inner: Context { + client, + interval, + diagnostics: self.diagnostics.clone(), + metrics, + }, + component, + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct Diagnostics { + #[serde(deserialize_with = "from_ts")] + pub last_event: DateTime, + #[serde(skip)] + pub reporter: Reporter, +} + +impl Diagnostics { + pub fn new(component: String) -> Self { + Self { + last_event: Utc::now(), + reporter: component.into(), + } + } +} + +impl Default for Diagnostics { + fn default() -> Self { + Self { + last_event: Utc::now(), + reporter: "sart".into(), + } + } +} + +impl Diagnostics { + fn recorder>( + &self, + client: Client, + res: T, + ) -> Recorder { + Recorder::new(client, self.reporter.clone(), res.object_ref(&())) + } +} + +#[tracing::instrument(skip_all)] +pub fn error_policy, E: TraceableError, C: Ctx>( + resource: Arc, + error: &E, + ctx: Arc, +) -> Action { + tracing::warn!("reconcile failed: {:?}", error); + let metrics = ctx.metrics(); + metrics.lock().unwrap().reconcile_failure(resource.as_ref()); + Action::requeue(Duration::from_secs(10)) +} + +impl Context { + pub fn test() -> (Arc, ApiServerVerifier, Registry) { + let (mock_service, handle) = tower_test::mock::pair::, Response>(); + let mock_client = Client::new(mock_service, "default"); + let registry = Registry::default(); + let ctx = Self { + client: mock_client, + metrics: Arc::new(Mutex::new(Metrics::default().register(®istry).unwrap())), + diagnostics: Arc::default(), + interval: 30, + }; + (Arc::new(ctx), ApiServerVerifier(handle), registry) + } +} + +impl ContextWith { + pub fn test(component: T) -> (Arc, ApiServerVerifier, Registry) { + let (mock_service, handle) = tower_test::mock::pair::, Response>(); + let mock_client = Client::new(mock_service, "default"); + let registry = Registry::default(); + let ctx = Context { + client: mock_client, + metrics: Arc::new(Mutex::new(Metrics::default().register(®istry).unwrap())), + diagnostics: Arc::default(), + interval: 30, + }; + let ctx_with = Self { + inner: ctx, + component, + }; + (Arc::new(ctx_with), ApiServerVerifier(handle), registry) + } +} diff --git a/sartd/src/kubernetes/src/controller/metrics.rs b/sartd/src/kubernetes/src/controller/metrics.rs new file mode 100644 index 0000000..7c6a4e4 --- /dev/null +++ b/sartd/src/kubernetes/src/controller/metrics.rs @@ -0,0 +1,181 @@ +use kube::Resource; +use prometheus::{histogram_opts, opts, HistogramVec, IntCounterVec}; +use prometheus::{IntGaugeVec, Registry}; +use tokio::time::Instant; + +#[derive(Debug, Clone)] +pub struct Metrics { + pub reconciliations: IntCounterVec, + pub failures: IntCounterVec, + pub reconcile_duration: HistogramVec, + pub max_blocks: IntGaugeVec, + pub allocated_blocks: IntGaugeVec, + pub bgp_advertisements: IntGaugeVec, + pub bgp_advertisement_status: IntGaugeVec, + pub bgp_advertisement_backoff: IntCounterVec, +} + +impl Default for Metrics { + fn default() -> Self { + let reconcile_duration = HistogramVec::new( + histogram_opts!( + "sart_controller_reconcile_duration_seconds", + "The duration of reconcile to complete in seconds" + ) + .buckets(vec![0.01, 0.1, 0.25, 0.5, 1., 5., 15., 60.]), + &[], + ) + .unwrap(); + let failures = IntCounterVec::new( + opts!( + "sart_controller_reconciliation_errors_total", + "Total count of reconciliation errors", + ), + &["resource", "instance"], + ) + .unwrap(); + let reconciliations = IntCounterVec::new( + opts!( + "sart_controller_reconciliation_total", + "Total count of reconciliations", + ), + &["resource", "instance"], + ) + .unwrap(); + let max_blocks = IntGaugeVec::new( + opts!( + "sart_controller_max_blocks", + "The number of maximum allocatable address blocks" + ), + &["pool", "type"], + ) + .unwrap(); + let allocated_blocks = IntGaugeVec::new( + opts!( + "sart_controller_allocated_blocks", + "The number of allocated address blocks" + ), + &["pool", "type"], + ) + .unwrap(); + let bgp_advertisements = IntGaugeVec::new( + opts!( + "sart_controller_bgp_advertisements", + "The number of BGP Advertisement" + ), + &["type"], + ) + .unwrap(); + let bgp_advertisement_status = IntGaugeVec::new( + opts!( + "sart_controller_bgp_advertisement_status", + "BGP Advertisement status" + ), + &["name", "status"], + ) + .unwrap(); + let bgp_advertisement_backoff = IntCounterVec::new( + opts!( + "sart_controller_bgp_advertisement_backoff_count", + "The number of back off count of BGP Advertisement " + ), + &["name"], + ) + .unwrap(); + + Metrics { + reconciliations, + failures, + reconcile_duration, + max_blocks, + allocated_blocks, + bgp_advertisements, + bgp_advertisement_status, + bgp_advertisement_backoff, + } + } +} + +impl Metrics { + pub fn register(self, registry: &Registry) -> Result { + registry.register(Box::new(self.reconciliations.clone()))?; + registry.register(Box::new(self.failures.clone()))?; + registry.register(Box::new(self.reconcile_duration.clone()))?; + registry.register(Box::new(self.max_blocks.clone()))?; + registry.register(Box::new(self.allocated_blocks.clone()))?; + registry.register(Box::new(self.bgp_advertisements.clone()))?; + registry.register(Box::new(self.bgp_advertisement_status.clone()))?; + registry.register(Box::new(self.bgp_advertisement_backoff.clone()))?; + Ok(self) + } + + pub fn reconcile_failure>(&self, resource: &T) { + self.failures + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } + + pub fn reconciliation>(&self, resource: &T) { + self.reconciliations + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } + + pub fn max_blocks(&self, pool: &str, r#type: &str, val: i64) { + self.max_blocks.with_label_values(&[pool, r#type]).set(val) + } + + pub fn allocated_blocks_inc(&self, pool: &str, r#type: &str) { + self.allocated_blocks + .with_label_values(&[pool, r#type]) + .inc() + } + + pub fn allocated_blocks_dec(&self, pool: &str, r#type: &str) { + self.allocated_blocks + .with_label_values(&[pool, r#type]) + .dec() + } + + pub fn bgp_advertisements_inc(&self, r#type: &str) { + self.bgp_advertisements.with_label_values(&[r#type]).inc() + } + + pub fn bgp_advertisements_dec(&self, r#type: &str) { + self.bgp_advertisements.with_label_values(&[r#type]).dec() + } + + pub fn bgp_advertisements_set(&self, r#type: &str, val: i64) { + self.bgp_advertisements + .with_label_values(&[r#type]) + .set(val) + } + + pub fn bgp_advertisement_status_set(&self, name: &str, status: &str, val: i64) { + self.bgp_advertisement_status + .with_label_values(&[name, status]) + .set(val) + } +} + +/// Smart function duration measurer +/// +/// Relies on Drop to calculate duration and register the observation in the histogram +pub struct ReconcileMeasurer { + start: Instant, + metric: HistogramVec, +} + +impl Drop for ReconcileMeasurer { + fn drop(&mut self) { + #[allow(clippy::cast_precision_loss)] + let duration = self.start.elapsed().as_millis() as f64 / 1000.0; + self.metric.with_label_values(&[]).observe(duration); + } +} diff --git a/sartd/src/kubernetes/src/controller/reconciler/address_block.rs b/sartd/src/kubernetes/src/controller/reconciler/address_block.rs index 9ed1786..98e0eaf 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/address_block.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/address_block.rs @@ -1,4 +1,7 @@ -use std::{str::FromStr, sync::Arc}; +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; use futures::StreamExt; use ipnet::IpNet; @@ -16,8 +19,11 @@ use sartd_ipam::manager::{AllocatorSet, Block, BlockAllocator}; use tracing::{field, Span}; use crate::{ - context::{error_policy, ContextWith, Ctx, State}, - controller::error::Error, + controller::{ + context::{error_policy, ContextWith, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::{ address_block::{AddressBlock, ADDRESS_BLOCK_FINALIZER_CONTROLLER}, address_pool::AddressType, @@ -36,6 +42,11 @@ pub async fn reconciler( ctx: Arc>, ) -> Result { let address_blocks = Api::::all(ctx.client().clone()); + let metrics = ctx.inner.metrics(); + metrics + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(ab.as_ref()); finalizer( &address_blocks, @@ -67,8 +78,8 @@ async fn reconcile( #[tracing::instrument(skip_all)] async fn reconcile_pod( _api: &Api, - _ab: &AddressBlock, - _ctx: Arc>, + ab: &AddressBlock, + ctx: Arc>, ) -> Result { Ok(Action::await_change()) } @@ -133,6 +144,11 @@ async fn reconcile_service( None => { let block = Block::new(ab.name_any(), ab.name_any(), cidr).map_err(Error::Ipam)?; alloc_set.blocks.insert(ab.name_any(), block); + + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .allocated_blocks_inc(&ab.spec.pool_ref, &format!("{}", ab.spec.r#type)); if ab.spec.auto_assign { match &alloc_set.auto_assign { Some(_a) => { @@ -179,6 +195,11 @@ async fn cleanup_pod( if let Some(pool) = block_allocator.get_mut(&ab.spec.pool_ref) { tracing::info!(pool=ab.spec.pool_ref, cidr=?pool.cidr, block_size=pool.block_size,"Remove the pool from the block allocator"); pool.release(index).map_err(Error::Ipam)?; + + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .allocated_blocks_dec(&ab.spec.pool_ref, &format!("{}", ab.spec.r#type)); } } Ok(Action::await_change()) @@ -218,12 +239,22 @@ async fn cleanup_service( if deletable { tracing::warn!(name = ab.name_any(), "delete block"); alloc_set.remove(&ab.name_any()); + + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .allocated_blocks_dec(&ab.spec.pool_ref, &format!("{}", ab.spec.r#type)); } Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64, ctx: ControllerAddressBlockContext) { +pub async fn run( + state: State, + interval: u64, + ctx: ControllerAddressBlockContext, + metrics: Arc>, +) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -242,7 +273,7 @@ pub async fn run(state: State, interval: u64, ctx: ControllerAddressBlockContext .run( reconciler, error_policy::>, - state.to_context_with::(client, interval, ctx), + state.to_context_with::(client, interval, ctx, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) @@ -258,8 +289,11 @@ mod tests { use sartd_ipam::manager::{AllocatorSet, Block, BlockAllocator}; use crate::{ - context::{ContextWith, Ctx}, - controller::{error::Error, reconciler::address_block::ControllerAddressBlockContext}, + controller::{ + context::{ContextWith, Ctx}, + error::Error, + reconciler::address_block::ControllerAddressBlockContext, + }, crd::address_block::{AddressBlock, AddressBlockSpec}, fixture::reconciler::{timeout_after_1s, ApiServerVerifier}, }; diff --git a/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs b/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs index 8bb4ce9..054694e 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs @@ -1,7 +1,7 @@ use std::{ net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, - sync::Arc, + sync::{Arc, Mutex}, }; use futures::StreamExt; @@ -22,8 +22,11 @@ use sartd_ipam::manager::{BlockAllocator, Pool}; use tracing::{field, Span}; use crate::{ - context::{error_policy, ContextWith, Ctx, State}, - controller::error::Error, + controller::{ + context::{error_policy, ContextWith, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::{ address_block::{AddressBlock, AddressBlockSpec}, address_pool::{AddressPool, AddressType, ADDRESS_POOL_ANNOTATION, ADDRESS_POOL_FINALIZER}, @@ -37,6 +40,11 @@ pub async fn reconciler( ctx: Arc>>, ) -> Result { let address_pools = Api::::all(ctx.client().clone()); + let metrics = ctx.inner.metrics(); + metrics + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(ap.as_ref()); finalizer(&address_pools, ADDRESS_POOL_FINALIZER, ap, |event| async { match event { @@ -74,6 +82,11 @@ async fn reconcile_service_pool( let cidr = IpNet::from_str(&ap.spec.cidr).map_err(|_| Error::InvalidCIDR)?; let block_size = ap.spec.block_size.unwrap_or(cidr.prefix_len() as u32); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .max_blocks(&ap.name_any(), &format!("{}", AddressType::Service), 1); + match address_blocks .get_opt(&ap.name_any()) .await @@ -132,6 +145,16 @@ async fn reconcile_pod_pool( tracing::info!(name = ap.name_any(), "Reconcile AddressPool"); let cidr = IpNet::from_str(&ap.spec.cidr).map_err(|_| Error::InvalidCIDR)?; let block_size = ap.spec.block_size.unwrap_or(cidr.prefix_len() as u32); + let max_blocks = block_size - cidr.prefix_len() as u32; + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .max_blocks( + &ap.name_any(), + &format!("{}", AddressType::Pod), + max_blocks as i64, + ); + { let tmp = ctx.component.clone(); let mut block_allocator = tmp.inner.lock().map_err(|_| Error::FailedToGetLock)?; @@ -159,14 +182,19 @@ async fn cleanup( .list(&list_params) .await .map_err(Error::Kube)?; - if ab_list.items.len() != 0 { + if !ab_list.items.is_empty() { return Err(Error::AddressPoolNotEmpty); } Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64, block_allocator: Arc) { +pub async fn run( + state: State, + interval: u64, + block_allocator: Arc, + metrics: Arc>, +) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -185,7 +213,7 @@ pub async fn run(state: State, interval: u64, block_allocator: Arc>>, - state.to_context_with(client, interval, block_allocator), + state.to_context_with(client, interval, block_allocator, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/reconciler/bgp_advertisement.rs b/sartd/src/kubernetes/src/controller/reconciler/bgp_advertisement.rs index 84fbb35..d7bb518 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/bgp_advertisement.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/bgp_advertisement.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use futures::StreamExt; use kube::{ @@ -14,8 +14,11 @@ use kube::{ use tracing::{field, Span}; use crate::{ - context::{error_policy, Context, State}, - controller::error::Error, + controller::{ + context::{error_policy, Context, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::bgp_advertisement::{AdvertiseStatus, BGPAdvertisement, BGP_ADVERTISEMENT_FINALIZER}, util::get_namespace, }; @@ -24,6 +27,11 @@ use crate::{ pub async fn reconciler(ba: Arc, ctx: Arc) -> Result { let ns = get_namespace::(&ba).map_err(Error::KubeLibrary)?; let bgp_advertisements = Api::::namespaced(ctx.client.clone(), &ns); + let metrics = ctx.metrics(); + metrics + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(ba.as_ref()); finalizer( &bgp_advertisements, @@ -42,10 +50,58 @@ pub async fn reconciler(ba: Arc, ctx: Arc) -> Result< #[tracing::instrument(skip_all, fields(trace_id))] async fn reconcile( - _api: &Api, + api: &Api, ba: &BGPAdvertisement, - _ctx: Arc, + ctx: Arc, ) -> Result { + let mut advertised = 0; + let mut not_advertised = 0; + let mut withdwraw = 0; + if let Some(status) = ba.status.as_ref() { + if let Some(peers) = status.peers.as_ref() { + for (_peer, adv_status) in peers.iter() { + match adv_status { + AdvertiseStatus::Advertised => advertised += 1, + AdvertiseStatus::NotAdvertised => not_advertised += 1, + AdvertiseStatus::Withdraw => withdwraw += 1, + } + } + } + } + { + let metrics = ctx.metrics(); + let metrics = metrics.lock().map_err(|_| Error::FailedToGetLock)?; + metrics.bgp_advertisement_status_set( + &ba.name_any(), + &format!("{}", AdvertiseStatus::Advertised), + advertised, + ); + metrics.bgp_advertisement_status_set( + &ba.name_any(), + &format!("{}", AdvertiseStatus::NotAdvertised), + not_advertised, + ); + metrics.bgp_advertisement_status_set( + &ba.name_any(), + &format!("{}", AdvertiseStatus::Withdraw), + withdwraw, + ); + } + + let ba_list = api + .list(&ListParams::default()) + .await + .map_err(Error::Kube)?; + let mut counter = 0; + for b in ba_list.iter() { + if b.spec.r#type.eq(&ba.spec.r#type) { + counter += 1; + } + } + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .bgp_advertisements_set(&format!("{}", ba.spec.r#type), counter as i64); Ok(Action::await_change()) } @@ -53,7 +109,7 @@ async fn reconcile( async fn cleanup( api: &Api, ba: &BGPAdvertisement, - _ctx: Arc, + ctx: Arc, ) -> Result { let trace_id = sartd_trace::telemetry::get_trace_id(); Span::current().record("trace_id", &field::display(&trace_id)); @@ -73,6 +129,10 @@ async fn cleanup( namespace = ns, "successfully delete BGPAdvertisement" ); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .bgp_advertisements_dec(&format!("{}", ba.spec.r#type)); return Ok(Action::await_change()); } for (_p, s) in peers.iter_mut() { @@ -87,6 +147,10 @@ async fn cleanup( namespace = ns, "successfully delete BGPAdvertisement" ); + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .bgp_advertisements_dec(&format!("{}", ba.spec.r#type)); return Ok(Action::await_change()); } @@ -112,7 +176,7 @@ async fn cleanup( } #[tracing::instrument(skip_all, fields(trace_id))] -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -139,7 +203,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) @@ -158,8 +222,7 @@ mod tests { use kube::core::ObjectMeta; use crate::{ - context::Context, - controller::error::Error, + controller::{context::Context, error::Error}, crd::{ address_pool::AddressType, bgp_advertisement::{ diff --git a/sartd/src/kubernetes/src/controller/reconciler/block_request.rs b/sartd/src/kubernetes/src/controller/reconciler/block_request.rs index f361c0b..ad657d1 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/block_request.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/block_request.rs @@ -2,7 +2,7 @@ use std::{ collections::BTreeMap, net::{Ipv4Addr, Ipv6Addr}, str::FromStr, - sync::Arc, + sync::{Arc, Mutex}, }; use futures::StreamExt; @@ -20,8 +20,11 @@ use kube::{ use sartd_ipam::manager::BlockAllocator; use crate::{ - context::{error_policy, ContextWith, Ctx, State}, - controller::error::Error, + controller::{ + context::{error_policy, ContextWith, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::{ address_block::{AddressBlock, AddressBlockSpec, AddressBlockStatus}, address_pool::{AddressPool, AddressType, ADDRESS_POOL_ANNOTATION}, @@ -35,6 +38,11 @@ pub async fn reconciler( ctx: Arc>>, ) -> Result { let block_request_api = Api::::all(ctx.client().clone()); + let metrics = ctx.inner.metrics(); + metrics + .lock() + .map_err(|_| Error::FailedToGetLock)? + .reconciliation(br.as_ref()); finalizer( &block_request_api, @@ -153,6 +161,11 @@ async fn reconcile( .patch_status(&ab.name_any(), &ssapply, &ab_patch) .await .map_err(Error::Kube)?; + + ctx.metrics() + .lock() + .map_err(|_| Error::FailedToGetLock)? + .allocated_blocks_inc(&ab.spec.pool_ref, &format!("{}", ab.spec.r#type)); } } @@ -169,7 +182,12 @@ async fn cleanup( Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64, block_allocator: Arc) { +pub async fn run( + state: State, + interval: u64, + block_allocator: Arc, + metrics: Arc>, +) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -191,7 +209,7 @@ pub async fn run(state: State, interval: u64, block_allocator: Arc>>, - state.to_context_with(client, interval, block_allocator), + state.to_context_with(client, interval, block_allocator, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/reconciler/cluster_bgp.rs b/sartd/src/kubernetes/src/controller/reconciler/cluster_bgp.rs index 0a6ba37..1e8a346 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/cluster_bgp.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/cluster_bgp.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, net::Ipv4Addr, sync::Arc, time::Duration}; +use std::{collections::BTreeMap, net::Ipv4Addr, sync::{Arc, Mutex}, time::Duration}; use futures::StreamExt; use k8s_openapi::api::core::v1::Node; @@ -15,8 +15,11 @@ use kube::{ use tracing::{field, Span}; use crate::{ - context::{error_policy, Context, State}, - controller::error::Error, + controller::{ + context::{error_policy, Context, State, Ctx}, + error::Error, + metrics::Metrics, + }, crd::{ bgp_peer::{BGPPeerSlim, PeerConfig}, bgp_peer_template::BGPPeerTemplate, @@ -32,6 +35,8 @@ use crate::{ #[tracing::instrument(skip_all, fields(trace_id))] pub async fn reconciler(cb: Arc, ctx: Arc) -> Result { let cluster_bgps = Api::::all(ctx.client.clone()); + let metrics = ctx.metrics(); + metrics.lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(cb.as_ref()); finalizer(&cluster_bgps, CLUSTER_BGP_FINALIZER, cb, |event| async { match event { @@ -282,7 +287,7 @@ async fn cleanup(cb: &ClusterBGP, _ctx: Arc) -> Result { Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -301,7 +306,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/reconciler/endpointslice_watcher.rs b/sartd/src/kubernetes/src/controller/reconciler/endpointslice_watcher.rs index 411b5e9..2b448f3 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/endpointslice_watcher.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/endpointslice_watcher.rs @@ -1,7 +1,7 @@ use std::{ collections::{BTreeMap, HashMap}, net::IpAddr, - sync::Arc, + sync::{Arc, Mutex}, time::Duration, }; @@ -22,9 +22,10 @@ use kube::{ use tracing::{field, Span}; use crate::{ - context::{error_policy, Context, Ctx, State}, controller::{ + context::{error_policy, Context, Ctx, State}, error::Error, + metrics::Metrics, reconciler::service_watcher::{get_allocated_lb_addrs, is_loadbalancer}, }, crd::{ @@ -46,6 +47,9 @@ pub const ENDPOINTSLICE_TRIGGER: &str = "endpointslice.sart.terassyi.net/trigger #[tracing::instrument(skip_all, fields(trace_id))] pub async fn reconciler(eps: Arc, ctx: Arc) -> Result { + let metrics = ctx.metrics(); + metrics.lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(eps.as_ref()); + let ns = get_namespace::(&eps).map_err(Error::KubeLibrary)?; let endpointslices = Api::::namespaced(ctx.client().clone(), &ns); @@ -253,12 +257,10 @@ async fn reconcile(eps: &EndpointSlice, ctx: Arc) -> Result) -> Result { - let ns = get_namespace::(eps).map_err(Error::KubeLibrary)?; - Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64) { +pub async fn run(state: State, interval: u64, metrics: Arc>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -272,7 +274,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/reconciler/node_watcher.rs b/sartd/src/kubernetes/src/controller/reconciler/node_watcher.rs index 56bfca7..98f7a56 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/node_watcher.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/node_watcher.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use futures::StreamExt; use k8s_openapi::api::core::v1::Node; @@ -15,8 +15,11 @@ use kube::{ use tracing::{field, Span}; use crate::{ - context::{error_policy, Context, State}, - controller::error::Error, + controller::{ + context::{error_policy, Context, State, Ctx}, + error::Error, + metrics::Metrics, + }, crd::{ cluster_bgp::{ClusterBGP, ClusterBGPStatus}, node_bgp::NodeBGP, @@ -28,6 +31,9 @@ pub const NODE_FINALIZER: &str = "node.sart.terassyi.net/finalizer"; #[tracing::instrument(skip_all, fields(trace_id))] pub async fn reconciler(node: Arc, ctx: Arc) -> Result { let nodes = Api::::all(ctx.client.clone()); + let metrics = ctx.metrics(); + metrics.lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(node.as_ref()); + finalizer(&nodes, NODE_FINALIZER, node, |event| async { match event { Event::Apply(node) => reconcile(&nodes, &node, ctx.clone()).await, @@ -106,7 +112,7 @@ async fn cleanup(_api: &Api, node: &Node, ctx: Arc) -> Result>) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -118,7 +124,7 @@ pub async fn run(state: State, interval: u64) { .run( reconciler, error_policy::, - state.to_context(client, interval), + state.to_context(client, interval, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/reconciler/service_watcher.rs b/sartd/src/kubernetes/src/controller/reconciler/service_watcher.rs index bb52cd3..731e940 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/service_watcher.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/service_watcher.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, net::IpAddr, str::FromStr, sync::Arc}; +use std::{collections::HashMap, net::IpAddr, str::FromStr, sync::{Arc, Mutex}}; use futures::StreamExt; use k8s_openapi::api::{ @@ -18,8 +18,11 @@ use kube::{ use tracing::{field, Span}; use crate::{ - context::{error_policy, ContextWith, Ctx, State}, - controller::error::Error, + controller::{ + context::{error_policy, ContextWith, Ctx, State}, + error::Error, + metrics::Metrics, + }, crd::address_pool::{ADDRESS_POOL_ANNOTATION, LOADBALANCER_ADDRESS_ANNOTATION}, util::{diff, get_namespace}, }; @@ -40,6 +43,9 @@ pub async fn reconciler( ) -> Result { let ns = get_namespace::(&svc).map_err(Error::KubeLibrary)?; + let metrics = ctx.inner.metrics(); + metrics.lock().map_err(|_| Error::FailedToGetLock)?.reconciliation(svc.as_ref()); + let services = Api::::namespaced(ctx.client().clone(), &ns); finalizer(&services, SERVICE_FINALIZER, svc, |event| async { @@ -318,7 +324,12 @@ async fn cleanup( Ok(Action::await_change()) } -pub async fn run(state: State, interval: u64, allocator_set: Arc) { +pub async fn run( + state: State, + interval: u64, + allocator_set: Arc, + metrics: Arc>, +) { let client = Client::try_default() .await .expect("Failed to create kube client"); @@ -332,7 +343,7 @@ pub async fn run(state: State, interval: u64, allocator_set: Arc) .run( reconciler, error_policy::>>, - state.to_context_with(client, interval, allocator_set), + state.to_context_with(client, interval, allocator_set, metrics), ) .filter_map(|x| async move { std::result::Result::ok(x) }) .for_each(|_| futures::future::ready(())) diff --git a/sartd/src/kubernetes/src/controller/server.rs b/sartd/src/kubernetes/src/controller/server.rs index 8bdb33f..11bc034 100644 --- a/sartd/src/kubernetes/src/controller/server.rs +++ b/sartd/src/kubernetes/src/controller/server.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, str::FromStr, sync::Arc}; +use std::{collections::HashMap, str::FromStr, sync::{Arc, Mutex}}; use actix_web::{ get, middleware, post, @@ -15,10 +15,14 @@ use sartd_ipam::manager::{AllocatorSet, BlockAllocator, Pool}; use sartd_trace::init::{prepare_tracing, TraceConfig}; use crate::{ - config::Mode, context::State, controller::reconciler::address_block::ControllerAddressBlockContext, crd::{ + config::Mode, + controller::{ + context::State, metrics::Metrics, reconciler::address_block::ControllerAddressBlockContext, + }, + crd::{ address_block::AddressBlock, address_pool::AddressPool, bgp_advertisement::BGPAdvertisement, bgp_peer::BGPPeer, - } + }, }; use super::{error::Error, reconciler, webhook}; @@ -90,11 +94,14 @@ async fn run(c: Controller, trace_config: TraceConfig) { let allocator_set = Arc::new(AllocatorSet::new()); + let metrics = Arc::new(Mutex::new(Metrics::default().register(&state.registry).unwrap())); + // Start reconcilers tracing::info!("Start ClusterBGP reconciler"); let cluster_bgp_state = state.clone(); + let cb_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::cluster_bgp::run(cluster_bgp_state, c.requeue_interval).await; + reconciler::cluster_bgp::run(cluster_bgp_state, c.requeue_interval, cb_metrics).await; }); let client = Client::try_default() @@ -110,12 +117,14 @@ async fn run(c: Controller, trace_config: TraceConfig) { tracing::info!("Start AddressPool reconciler"); let address_pool_state = state.clone(); + let ap_metrics = metrics.clone(); let block_allocator_cloned = block_allocator.clone(); tokio::spawn(async move { reconciler::address_pool::run( address_pool_state, c.requeue_interval, block_allocator_cloned, + ap_metrics, ) .await; }); @@ -124,23 +133,26 @@ async fn run(c: Controller, trace_config: TraceConfig) { let address_block_state = state.clone(); let ab_allocator_set = allocator_set.clone(); let ab_block_allocator = block_allocator.clone(); - let ab_ctx = ControllerAddressBlockContext{ + let ab_ctx = ControllerAddressBlockContext { allocator_set: ab_allocator_set, block_allocator: ab_block_allocator, }; + let ab_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::address_block::run(address_block_state, c.requeue_interval, ab_ctx) + reconciler::address_block::run(address_block_state, c.requeue_interval, ab_ctx, ab_metrics) .await; }); if c.mode.eq(&Mode::CNI) || c.mode.eq(&Mode::Dual) { tracing::info!("Start BlockRequest reconciler"); let block_request_state = state.clone(); + let br_metrics = metrics.clone(); tokio::spawn(async move { reconciler::block_request::run( block_request_state, c.requeue_interval, block_allocator.clone(), + br_metrics, ) .await; }); @@ -148,30 +160,45 @@ async fn run(c: Controller, trace_config: TraceConfig) { tracing::info!("Start Node watcher"); let node_state = state.clone(); + let m_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::node_watcher::run(node_state, c.requeue_interval).await; + reconciler::node_watcher::run(node_state, c.requeue_interval, m_metrics).await; }); if c.mode.eq(&Mode::LB) || c.mode.eq(&Mode::Dual) { tracing::info!("Start Service watcher"); let service_state = state.clone(); let svc_allocator_set = allocator_set.clone(); + let svc_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::service_watcher::run(service_state, c.requeue_interval, svc_allocator_set) - .await; + reconciler::service_watcher::run( + service_state, + c.requeue_interval, + svc_allocator_set, + svc_metrics, + ) + .await; }); tracing::info!("Start Endpointslice watcher"); let endpointslice_state = state.clone(); + let eps_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::endpointslice_watcher::run(endpointslice_state, c.requeue_interval).await; + reconciler::endpointslice_watcher::run( + endpointslice_state, + c.requeue_interval, + eps_metrics, + ) + .await; }); } tracing::info!("Start BGPAdvertisement reconciler"); let bgp_advertisement_state = state.clone(); + let ba_metrics = metrics.clone(); tokio::spawn(async move { - reconciler::bgp_advertisement::run(bgp_advertisement_state, c.requeue_interval).await; + reconciler::bgp_advertisement::run(bgp_advertisement_state, c.requeue_interval, ba_metrics) + .await; }); server.run().await.unwrap() @@ -215,6 +242,7 @@ async fn metrics_(c: Data, _req: HttpRequest) -> impl Responder { let encoder = TextEncoder::new(); let mut buffer = vec![]; encoder.encode(&metrics, &mut buffer).unwrap(); + tracing::info!(data=?buffer); HttpResponse::Ok().body(buffer) } diff --git a/sartd/src/kubernetes/src/crd/bgp_advertisement.rs b/sartd/src/kubernetes/src/crd/bgp_advertisement.rs index 9793137..87f9f24 100644 --- a/sartd/src/kubernetes/src/crd/bgp_advertisement.rs +++ b/sartd/src/kubernetes/src/crd/bgp_advertisement.rs @@ -45,6 +45,16 @@ pub enum AdvertiseStatus { Withdraw, } +impl std::fmt::Display for AdvertiseStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AdvertiseStatus::Advertised => write!(f, "advertised"), + AdvertiseStatus::NotAdvertised => write!(f, "notadvertised"), + AdvertiseStatus::Withdraw => write!(f, "withdraw"), + } + } +} + #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] #[serde(rename_all = "camelCase")] pub enum Protocol { diff --git a/sartd/src/kubernetes/src/crd/bgp_peer.rs b/sartd/src/kubernetes/src/crd/bgp_peer.rs index dea5b31..a47feab 100644 --- a/sartd/src/kubernetes/src/crd/bgp_peer.rs +++ b/sartd/src/kubernetes/src/crd/bgp_peer.rs @@ -1,3 +1,4 @@ +use core::fmt; use std::collections::BTreeMap; use kube::core::ObjectMeta; @@ -70,6 +71,20 @@ pub enum BGPPeerConditionStatus { Established = 6, } +impl fmt::Display for BGPPeerConditionStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Unknown => write!(f, "unknown"), + Self::Idle => write!(f, "idle"), + Self::Active => write!(f, "active"), + Self::Connect => write!(f, "connect"), + Self::OpenSent => write!(f, "opensent"), + Self::OpenConfirm => write!(f, "openconfirm"), + Self::Established => write!(f, "established"), + } + } +} + #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct BGPPeerSlim { diff --git a/sartd/src/kubernetes/src/crd/node_bgp.rs b/sartd/src/kubernetes/src/crd/node_bgp.rs index fde0d56..9d051f7 100644 --- a/sartd/src/kubernetes/src/crd/node_bgp.rs +++ b/sartd/src/kubernetes/src/crd/node_bgp.rs @@ -1,3 +1,5 @@ +use core::fmt; + use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -48,6 +50,15 @@ pub enum NodeBGPConditionStatus { Unavailable, } +impl std::fmt::Display for NodeBGPConditionStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Available => write!(f, "available"), + Self::Unavailable => write!(f, "unavailable"), + } + } +} + #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema, PartialEq, Eq)] pub enum NodeBGPConditionReason { #[default] diff --git a/sartd/src/kubernetes/src/fixture.rs b/sartd/src/kubernetes/src/fixture.rs index 7baf597..c9fd43e 100644 --- a/sartd/src/kubernetes/src/fixture.rs +++ b/sartd/src/kubernetes/src/fixture.rs @@ -1,6 +1,6 @@ // #[cfg(test)] pub mod reconciler { - use std::{collections::BTreeMap, sync::Arc}; + use std::{collections::BTreeMap, sync::{Arc, Mutex}}; use http::{Request, Response}; use hyper::Body; @@ -20,7 +20,6 @@ pub mod reconciler { Client, Resource, ResourceExt, }; use prometheus::Registry; - use sartd_trace::metrics::Metrics; use serde::Serialize; use crate::{ @@ -29,7 +28,10 @@ pub mod reconciler { endpointslice_watcher::ENDPOINTSLICE_FINALIZER, service_watcher::SERVICE_FINALIZER, }, crd::{ - address_block::{AddressBlock, AddressBlockSpec, ADDRESS_BLOCK_FINALIZER_AGENT, ADDRESS_BLOCK_FINALIZER_CONTROLLER}, + address_block::{ + AddressBlock, AddressBlockSpec, ADDRESS_BLOCK_FINALIZER_AGENT, + ADDRESS_BLOCK_FINALIZER_CONTROLLER, + }, address_pool::{ AddressPool, AddressPoolSpec, AddressType, AllocationType, ADDRESS_POOL_FINALIZER, }, @@ -45,6 +47,7 @@ pub mod reconciler { }, node_bgp::{NodeBGP, NodeBGPSpec}, }, + metrics::Metrics, }; pub type ApiServerHandle = tower_test::mock::Handle, Response>; @@ -64,7 +67,7 @@ pub mod reconciler { let registry = Registry::default(); let ctx = Self { client: mock_client, - metrics: Metrics::default().register(®istry).unwrap(), + metrics: Arc::new(Mutex::new(Metrics::default().register(®istry).unwrap())), diagnostics: Arc::default(), interval: 30, }; @@ -79,7 +82,7 @@ pub mod reconciler { let registry = Registry::default(); let ctx = Context { client: mock_client, - metrics: Metrics::default().register(®istry).unwrap(), + metrics: Arc::new(Mutex::new(Metrics::default().register(®istry).unwrap())), diagnostics: Arc::default(), interval: 30, }; @@ -287,7 +290,10 @@ pub mod reconciler { pub fn test_address_block_lb() -> AddressBlock { AddressBlock { metadata: ObjectMeta { - finalizers: Some(vec![ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), ADDRESS_BLOCK_FINALIZER_AGENT.to_string()]), + finalizers: Some(vec![ + ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), + ADDRESS_BLOCK_FINALIZER_AGENT.to_string(), + ]), name: Some("test-pool".to_string()), ..Default::default() }, @@ -305,7 +311,10 @@ pub mod reconciler { pub fn test_address_block_lb_non_default() -> AddressBlock { AddressBlock { metadata: ObjectMeta { - finalizers: Some(vec![ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), ADDRESS_BLOCK_FINALIZER_AGENT.to_string()]), + finalizers: Some(vec![ + ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), + ADDRESS_BLOCK_FINALIZER_AGENT.to_string(), + ]), name: Some("test-pool-non-default".to_string()), ..Default::default() }, @@ -359,7 +368,10 @@ pub mod reconciler { pub fn test_address_block_pod() -> AddressBlock { AddressBlock { metadata: ObjectMeta { - finalizers: Some(vec![ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), ADDRESS_BLOCK_FINALIZER_AGENT.to_string()]), + finalizers: Some(vec![ + ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), + ADDRESS_BLOCK_FINALIZER_AGENT.to_string(), + ]), name: Some("test-pool-sart-integration-control-plane-10.0.0.0".to_string()), ..Default::default() }, @@ -377,7 +389,10 @@ pub mod reconciler { pub fn test_address_block_pod2() -> AddressBlock { AddressBlock { metadata: ObjectMeta { - finalizers: Some(vec![ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), ADDRESS_BLOCK_FINALIZER_AGENT.to_string()]), + finalizers: Some(vec![ + ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), + ADDRESS_BLOCK_FINALIZER_AGENT.to_string(), + ]), name: Some("test-pool-sart-integration-control-plane-10.0.0.32".to_string()), ..Default::default() }, @@ -395,7 +410,10 @@ pub mod reconciler { pub fn test_address_block_pod_non_default() -> AddressBlock { AddressBlock { metadata: ObjectMeta { - finalizers: Some(vec![ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), ADDRESS_BLOCK_FINALIZER_AGENT.to_string()]), + finalizers: Some(vec![ + ADDRESS_BLOCK_FINALIZER_CONTROLLER.to_string(), + ADDRESS_BLOCK_FINALIZER_AGENT.to_string(), + ]), name: Some("test-pool-non-default-sart-integration-10.1.0.0".to_string()), ..Default::default() }, diff --git a/sartd/src/kubernetes/src/lib.rs b/sartd/src/kubernetes/src/lib.rs index 32831b3..6d4ec21 100644 --- a/sartd/src/kubernetes/src/lib.rs +++ b/sartd/src/kubernetes/src/lib.rs @@ -5,4 +5,5 @@ pub mod controller; pub mod crd; pub mod error; pub mod fixture; +pub mod metrics; pub mod util; diff --git a/sartd/src/kubernetes/src/metrics.rs b/sartd/src/kubernetes/src/metrics.rs new file mode 100644 index 0000000..44a077d --- /dev/null +++ b/sartd/src/kubernetes/src/metrics.rs @@ -0,0 +1,89 @@ +use kube::Resource; +use prometheus::Registry; +use prometheus::{histogram_opts, opts, HistogramVec, IntCounterVec}; +use tokio::time::Instant; + +#[derive(Debug, Clone)] +pub struct Metrics { + pub reconciliations: IntCounterVec, + pub failures: IntCounterVec, + pub reconcile_duration: HistogramVec, +} + +impl Default for Metrics { + fn default() -> Self { + let reconcile_duration = HistogramVec::new( + histogram_opts!( + "sart_controller_reconcile_duration_seconds", + "The duration of reconcile to complete in seconds" + ) + .buckets(vec![0.01, 0.1, 0.25, 0.5, 1., 5., 15., 60.]), + &[], + ) + .unwrap(); + let failures = IntCounterVec::new( + opts!( + "sart_controller_reconciliation_errors_total", + "reconciliation errors", + ), + &["resource", "instance", "error"], + ) + .unwrap(); + let reconciliations = IntCounterVec::new( + opts!( + "sart_controller_reconciliation_total", + "Total count of reconciliations", + ), + &["resource", "instance"], + ) + .unwrap(); + Metrics { + reconciliations, + failures, + reconcile_duration, + } + } +} + +impl Metrics { + pub fn register(self, registry: &Registry) -> Result { + registry.register(Box::new(self.reconciliations.clone()))?; + registry.register(Box::new(self.failures.clone()))?; + registry.register(Box::new(self.reconcile_duration.clone()))?; + Ok(self) + } + + pub fn reconcile_failure>(&self, resource: &T) { + self.failures + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } + + pub fn reconciliation>(&self, resource: &T) { + self.reconciliations + .with_label_values(&[ + &resource.object_ref(&()).kind.unwrap(), + &resource.object_ref(&()).name.unwrap(), + ]) + .inc() + } +} + +/// Smart function duration measurer +/// +/// Relies on Drop to calculate duration and register the observation in the histogram +pub struct ReconcileMeasurer { + start: Instant, + metric: HistogramVec, +} + +impl Drop for ReconcileMeasurer { + fn drop(&mut self) { + #[allow(clippy::cast_precision_loss)] + let duration = self.start.elapsed().as_millis() as f64 / 1000.0; + self.metric.with_label_values(&[]).observe(duration); + } +} diff --git a/sartd/src/kubernetes/tests/agent_address_block_test.rs b/sartd/src/kubernetes/tests/agent_address_block_test.rs index 34913b2..fc487f9 100644 --- a/sartd/src/kubernetes/tests/agent_address_block_test.rs +++ b/sartd/src/kubernetes/tests/agent_address_block_test.rs @@ -1,4 +1,4 @@ -use std::{net::IpAddr, str::FromStr, sync::Arc}; +use std::{net::IpAddr, str::FromStr, sync::{Arc, Mutex}}; use kube::{ api::{DeleteParams, Patch, PatchParams}, @@ -6,8 +6,12 @@ use kube::{ }; use sartd_ipam::manager::AllocatorSet; use sartd_kubernetes::{ - agent::{self, reconciler::address_block::PodAllocator}, - context::{Ctx, State}, + agent::{ + self, + context::{Ctx, State}, + metrics::Metrics, + reconciler::address_block::PodAllocator, + }, crd::{address_block::AddressBlock, bgp_advertisement::BGPAdvertisement}, fixture::{ reconciler::{test_address_block_pod, test_address_block_pod2}, @@ -39,7 +43,12 @@ async fn integration_test_address_block() { notifier: sender, }); - let ctx = State::default().to_context_with(client.clone(), 30, pod_allocator.clone()); + let ctx = State::default().to_context_with( + client.clone(), + 30, + pod_allocator.clone(), + Arc::new(Mutex::new(Metrics::default())), + ); tracing::info!("Creating an AddressBlock resource"); let ab = test_address_block_pod(); diff --git a/sartd/src/kubernetes/tests/agent_bgp_advertisement_test.rs b/sartd/src/kubernetes/tests/agent_bgp_advertisement_test.rs index 32cfbc8..d09e9f0 100644 --- a/sartd/src/kubernetes/tests/agent_bgp_advertisement_test.rs +++ b/sartd/src/kubernetes/tests/agent_bgp_advertisement_test.rs @@ -5,8 +5,7 @@ use kube::{ Api, Client, ResourceExt, }; use sartd_kubernetes::{ - agent::{self, reconciler::node_bgp::ENV_HOSTNAME}, - context::State, + agent::{self, context::State, metrics::Metrics, reconciler::node_bgp::ENV_HOSTNAME}, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement}, bgp_peer::{BGPPeer, BGPPeerCondition, BGPPeerConditionStatus, BGPPeerStatus}, @@ -46,7 +45,7 @@ async fn integration_test_agent_bgp_advertisement() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); tracing::info!("Creating NodeBGP"); let nb = test_node_bgp(); diff --git a/sartd/src/kubernetes/tests/agent_bgp_peer_test.rs b/sartd/src/kubernetes/tests/agent_bgp_peer_test.rs index c98ea90..e6cd0fa 100644 --- a/sartd/src/kubernetes/tests/agent_bgp_peer_test.rs +++ b/sartd/src/kubernetes/tests/agent_bgp_peer_test.rs @@ -9,8 +9,7 @@ use kube::{ Api, Client, ResourceExt, }; use sartd_kubernetes::{ - agent, - context::State, + agent::{self, context::State, metrics::Metrics}, crd::{ bgp_peer::{BGPPeer, BGPPeerConditionStatus}, node_bgp::NodeBGP, @@ -43,7 +42,7 @@ async fn integration_test_agent_bgp_peer() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); tracing::info!("Preraring NodeBGP resource"); let nb = test_node_bgp(); diff --git a/sartd/src/kubernetes/tests/agent_cni_server_test.rs b/sartd/src/kubernetes/tests/agent_cni_server_test.rs index 65d6e40..69509c3 100644 --- a/sartd/src/kubernetes/tests/agent_cni_server_test.rs +++ b/sartd/src/kubernetes/tests/agent_cni_server_test.rs @@ -1,4 +1,4 @@ -use std::{net::IpAddr, str::FromStr, sync::Arc, time::Duration}; +use std::{net::IpAddr, str::FromStr, sync::{Arc, Mutex}, time::Duration}; use crate::common::{setup_kind, TestContainer, TestRoutingRule}; diff --git a/sartd/src/kubernetes/tests/agent_node_bgp_test.rs b/sartd/src/kubernetes/tests/agent_node_bgp_test.rs index 819e8cc..4735fa2 100644 --- a/sartd/src/kubernetes/tests/agent_node_bgp_test.rs +++ b/sartd/src/kubernetes/tests/agent_node_bgp_test.rs @@ -1,12 +1,11 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use kube::{ api::{DeleteParams, Patch, PatchParams}, Api, Client, ResourceExt, }; use sartd_kubernetes::{ - agent, - context::State, + agent::{self, context::State, metrics::Metrics}, crd::{ bgp_peer::BGPPeer, node_bgp::{NodeBGP, NodeBGPCondition, NodeBGPConditionReason, NodeBGPConditionStatus}, @@ -33,7 +32,7 @@ async fn integration_test_agent_node_bgp() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let nb = test_node_bgp(); let nb_api = Api::::all(ctx.client.clone()); diff --git a/sartd/src/kubernetes/tests/controller_address_block_test.rs b/sartd/src/kubernetes/tests/controller_address_block_test.rs index 4f1058c..d3fde6b 100644 --- a/sartd/src/kubernetes/tests/controller_address_block_test.rs +++ b/sartd/src/kubernetes/tests/controller_address_block_test.rs @@ -1,4 +1,4 @@ -use std::{net::IpAddr, str::FromStr, sync::Arc}; +use std::{net::IpAddr, str::FromStr, sync::{Arc, Mutex}}; use kube::{ api::{DeleteParams, Patch, PatchParams}, @@ -6,8 +6,12 @@ use kube::{ }; use sartd_ipam::manager::{AllocatorSet, BlockAllocator}; use sartd_kubernetes::{ - context::{Ctx, State}, - controller::{self, reconciler::address_block::ControllerAddressBlockContext}, + controller::{ + self, + context::{Ctx, State}, + metrics::Metrics, + reconciler::address_block::ControllerAddressBlockContext, + }, crd::address_block::AddressBlock, fixture::{ reconciler::{test_address_block_lb, test_address_block_lb_non_default}, @@ -32,7 +36,7 @@ async fn integration_test_address_block() { let allocator_set = Arc::new(AllocatorSet::new()); let block_allocator = Arc::new(BlockAllocator::default()); - let ab_ctx = ControllerAddressBlockContext{ + let ab_ctx = ControllerAddressBlockContext { allocator_set: allocator_set.clone(), block_allocator: block_allocator.clone(), }; @@ -40,6 +44,7 @@ async fn integration_test_address_block() { client.clone(), 30, ab_ctx, + Arc::new(Mutex::new(Metrics::default())), ); tracing::info!("Creating an AddressBlock resource"); diff --git a/sartd/src/kubernetes/tests/controller_address_pool_pod_test.rs b/sartd/src/kubernetes/tests/controller_address_pool_pod_test.rs index bae602c..650af87 100644 --- a/sartd/src/kubernetes/tests/controller_address_pool_pod_test.rs +++ b/sartd/src/kubernetes/tests/controller_address_pool_pod_test.rs @@ -1,5 +1,4 @@ -use std::sync::Arc; - +use std::sync::{Arc, Mutex}; use common::{cleanup_kind, setup_kind}; @@ -9,8 +8,11 @@ use kube::{ }; use sartd_ipam::manager::BlockAllocator; use sartd_kubernetes::{ - context::{State, Ctx}, - controller, + controller::{ + self, + context::{Ctx, State}, + metrics::Metrics, + }, crd::{ address_block::AddressBlock, address_pool::{AddressPool, ADDRESS_POOL_ANNOTATION}, @@ -35,7 +37,12 @@ async fn test_address_pool_pod_handling_request() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); let block_allocator = Arc::new(BlockAllocator::default()); - let ctx = State::default().to_context_with(client.clone(), 30, block_allocator); + let ctx = State::default().to_context_with( + client.clone(), + 30, + block_allocator, + Arc::new(Mutex::new(Metrics::default())), + ); let ap = test_address_pool_pod(); @@ -71,7 +78,6 @@ async fn test_address_pool_pod_handling_request() { let ab_api = Api::::all(ctx.client().clone()); - tracing::info!("Changing auto assign to false"); applied_ap.spec.auto_assign = Some(false); diff --git a/sartd/src/kubernetes/tests/controller_address_pool_service_test.rs b/sartd/src/kubernetes/tests/controller_address_pool_service_test.rs index e3adbd1..61e01f5 100644 --- a/sartd/src/kubernetes/tests/controller_address_pool_service_test.rs +++ b/sartd/src/kubernetes/tests/controller_address_pool_service_test.rs @@ -1,5 +1,4 @@ -use std::sync::Arc; -use std::collections::HashMap; +use std::sync::{Arc, Mutex}; use common::{cleanup_kind, setup_kind}; @@ -9,8 +8,11 @@ use kube::{ }; use sartd_ipam::manager::BlockAllocator; use sartd_kubernetes::{ - context::{State, Ctx}, - controller, + controller::{ + self, + context::{Ctx, State}, + metrics::Metrics, + }, crd::{address_block::AddressBlock, address_pool::AddressPool}, fixture::{reconciler::test_address_pool_lb, test_trace}, }; @@ -28,7 +30,12 @@ async fn integration_test_address_pool() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); let block_allocator = Arc::new(BlockAllocator::default()); - let ctx = State::default().to_context_with(client.clone(), 30, block_allocator); + let ctx = State::default().to_context_with( + client.clone(), + 30, + block_allocator, + Arc::new(Mutex::new(Metrics::default())), + ); let ap = test_address_pool_lb(); diff --git a/sartd/src/kubernetes/tests/controller_bgp_advertisement_test.rs b/sartd/src/kubernetes/tests/controller_bgp_advertisement_test.rs index 1414d29..b7a9374 100644 --- a/sartd/src/kubernetes/tests/controller_bgp_advertisement_test.rs +++ b/sartd/src/kubernetes/tests/controller_bgp_advertisement_test.rs @@ -1,12 +1,11 @@ -use std::{collections::BTreeMap, sync::Arc}; +use std::{collections::BTreeMap, sync::{Arc, Mutex}}; use kube::{ api::{DeleteParams, Patch, PatchParams}, Api, Client, ResourceExt, }; use sartd_kubernetes::{ - context::State, - controller, + controller::{self, context::State, metrics::Metrics}, crd::bgp_advertisement::{AdvertiseStatus, BGPAdvertisement, BGPAdvertisementStatus}, fixture::{reconciler::test_bgp_advertisement_svc, test_trace}, util::get_namespace, @@ -26,7 +25,7 @@ async fn integration_test_controller_bgp_advertisement() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let mut ba = test_bgp_advertisement_svc(); ba.status = Some(BGPAdvertisementStatus { diff --git a/sartd/src/kubernetes/tests/controller_block_request_test.rs b/sartd/src/kubernetes/tests/controller_block_request_test.rs index a328b47..d656dae 100644 --- a/sartd/src/kubernetes/tests/controller_block_request_test.rs +++ b/sartd/src/kubernetes/tests/controller_block_request_test.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use kube::{ api::{Patch, PatchParams}, @@ -6,8 +6,11 @@ use kube::{ }; use sartd_ipam::manager::BlockAllocator; use sartd_kubernetes::{ - context::{Ctx, State}, - controller, + controller::{ + self, + context::{Ctx, State}, + metrics::Metrics, + }, crd::{address_pool::AddressPool, block_request::BlockRequest}, fixture::{ reconciler::{test_address_pool_pod, test_block_request}, @@ -32,7 +35,12 @@ async fn integration_test_block_request() { let block_allocator = Arc::new(BlockAllocator::default()); - let ctx = State::default().to_context_with(client.clone(), 30, block_allocator); + let ctx = State::default().to_context_with( + client.clone(), + 30, + block_allocator, + Arc::new(Mutex::new(Metrics::default())), + ); tracing::info!("Creating AddressPool"); let ap = test_address_pool_pod(); diff --git a/sartd/src/kubernetes/tests/controller_cluster_bgp_test.rs b/sartd/src/kubernetes/tests/controller_cluster_bgp_test.rs index eed7cca..1fa65c2 100644 --- a/sartd/src/kubernetes/tests/controller_cluster_bgp_test.rs +++ b/sartd/src/kubernetes/tests/controller_cluster_bgp_test.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use common::{cleanup_kind, setup_kind}; use k8s_openapi::api::core::v1::Node; @@ -9,11 +9,12 @@ use kube::{ Api, Client, }; use sartd_kubernetes::controller; +use sartd_kubernetes::controller::context::State; +use sartd_kubernetes::controller::metrics::Metrics; use sartd_kubernetes::crd::cluster_bgp::{AsnSelectionType, AsnSelector, ASN_LABEL}; use sartd_kubernetes::crd::node_bgp::NodeBGP; use sartd_kubernetes::fixture::test_trace; use sartd_kubernetes::{ - context::State, crd::{bgp_peer_template::BGPPeerTemplate, cluster_bgp::ClusterBGP}, fixture::reconciler::{test_bgp_peer_tmpl, test_cluster_bgp}, }; @@ -30,7 +31,7 @@ async fn integration_test_cluster_bgp_asn() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let cb = test_cluster_bgp(); let cb_api = Api::::all(ctx.client.clone()); @@ -82,7 +83,7 @@ async fn integration_test_cluster_bgp_asn() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let mut cb = test_cluster_bgp(); cb.spec.asn_selector = AsnSelector { diff --git a/sartd/src/kubernetes/tests/controller_endpointslice_watcher_test.rs b/sartd/src/kubernetes/tests/controller_endpointslice_watcher_test.rs index 1590707..f31e5ba 100644 --- a/sartd/src/kubernetes/tests/controller_endpointslice_watcher_test.rs +++ b/sartd/src/kubernetes/tests/controller_endpointslice_watcher_test.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use k8s_openapi::api::{ core::v1::Service, @@ -9,8 +9,12 @@ use kube::{ Api, Client, ResourceExt, }; use sartd_kubernetes::{ - context::{Context, State}, - controller::{self, reconciler::service_watcher::SERVICE_ETP_ANNOTATION}, + controller::{ + self, + context::{Context, State}, + metrics::Metrics, + reconciler::service_watcher::SERVICE_ETP_ANNOTATION, + }, crd::{ bgp_advertisement::{AdvertiseStatus, BGPAdvertisement}, bgp_peer::BGPPeer, @@ -37,7 +41,7 @@ async fn integration_test_endpointslice_watcher() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let eps = test_eps(); diff --git a/sartd/src/kubernetes/tests/controller_service_watcher_test.rs b/sartd/src/kubernetes/tests/controller_service_watcher_test.rs index 5c73222..58662e6 100644 --- a/sartd/src/kubernetes/tests/controller_service_watcher_test.rs +++ b/sartd/src/kubernetes/tests/controller_service_watcher_test.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, net::IpAddr, str::FromStr, sync::Arc}; +use std::{collections::HashMap, net::IpAddr, str::FromStr, sync::{Arc, Mutex}}; use common::{cleanup_kind, setup_kind}; @@ -10,9 +10,10 @@ use kube::{ }; use sartd_ipam::manager::{AllocatorSet, Block}; use sartd_kubernetes::{ - context::{Ctx, State}, controller::{ self, + context::{Ctx, State}, + metrics::Metrics, reconciler::service_watcher::{get_allocated_lb_addrs, RELEASE_ANNOTATION}, }, crd::address_pool::{ADDRESS_POOL_ANNOTATION, LOADBALANCER_ADDRESS_ANNOTATION}, @@ -40,6 +41,7 @@ async fn integration_test_service_watcher() { client.clone(), 30, allocator_set.clone(), + Arc::new(Mutex::new(Metrics::default())), ); let pool_name = "test-pool"; diff --git a/sartd/src/kubernetes/tests/node_watcher_test.rs b/sartd/src/kubernetes/tests/node_watcher_test.rs index 0db1823..2176f46 100644 --- a/sartd/src/kubernetes/tests/node_watcher_test.rs +++ b/sartd/src/kubernetes/tests/node_watcher_test.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, sync::Arc}; +use std::{collections::BTreeMap, sync::{Arc, Mutex}}; use k8s_openapi::api::core::v1::Node; use kube::{ @@ -6,8 +6,9 @@ use kube::{ Api, Client, ResourceExt, }; use sartd_kubernetes::{ - context::State, - controller::{self, reconciler::node_watcher::NODE_FINALIZER}, + controller::{ + self, context::State, metrics::Metrics, reconciler::node_watcher::NODE_FINALIZER, + }, crd::{ bgp_peer_template::BGPPeerTemplate, cluster_bgp::{ClusterBGP, ClusterBGPStatus}, @@ -35,7 +36,7 @@ async fn integration_test_controller_node_watcher() { tracing::info!("Getting kube client"); let client = Client::try_default().await.unwrap(); - let ctx = State::default().to_context(client.clone(), 30); + let ctx = State::default().to_context(client.clone(), 30, Arc::new(Mutex::new(Metrics::default()))); let mut cb = test_cluster_bgp(); cb.spec.node_selector = Some(BTreeMap::from([("bgp".to_string(), "a".to_string())])); diff --git a/sartd/src/proto/src/sart.v1.rs b/sartd/src/proto/src/sart.v1.rs index aec10d7..c219187 100644 --- a/sartd/src/proto/src/sart.v1.rs +++ b/sartd/src/proto/src/sart.v1.rs @@ -1,4 +1,3 @@ -// This file is @generated by prost-build. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct HealthRequest {} diff --git a/sartd/src/trace/Cargo.lock b/sartd/src/trace/Cargo.lock index 6006cbd..784fa63 100644 --- a/sartd/src/trace/Cargo.lock +++ b/sartd/src/trace/Cargo.lock @@ -1087,6 +1087,7 @@ dependencies = [ "kube", "opentelemetry", "prometheus", + "rand", "tokio", "tracing", "tracing-opentelemetry", diff --git a/sartd/src/trace/Cargo.toml b/sartd/src/trace/Cargo.toml index d42d7a3..a9eaca3 100644 --- a/sartd/src/trace/Cargo.toml +++ b/sartd/src/trace/Cargo.toml @@ -8,6 +8,7 @@ k8s-openapi = { version = "0.20.0", features = ["v1_28"] } kube = { version = "0.87.2", features = [] } opentelemetry = { version = "0.21.0", features = ["trace"] } prometheus = "0.13.3" +rand = "0.8.5" tokio = { version = "1.35.1", features = ["time"] } tracing = "0.1.40" tracing-opentelemetry = "0.22.0" diff --git a/sartd/src/trace/src/telemetry.rs b/sartd/src/trace/src/telemetry.rs index b6bac92..163b12f 100644 --- a/sartd/src/trace/src/telemetry.rs +++ b/sartd/src/trace/src/telemetry.rs @@ -1,16 +1,12 @@ use opentelemetry::trace::TraceId; +use rand::Rng; use tracing_subscriber::{prelude::*, Registry}; /// Fetch an opentelemetry::trace::TraceId as hex through the full tracing stack pub fn get_trace_id() -> TraceId { - use opentelemetry::trace::TraceContextExt as _; // opentelemetry::Context -> opentelemetry::trace::Span - use tracing_opentelemetry::OpenTelemetrySpanExt as _; // tracing::Span to opentelemetry::Context - - tracing::Span::current() - .context() - .span() - .span_context() - .trace_id() + let mut rng = rand::thread_rng(); + let val: u128 = rng.gen(); + TraceId::from(val) } pub async fn init(level: tracing::Level) {