diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index efd7f3c042..7904526440 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "runtime" - "strings" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -16,6 +15,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/config" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" + "github.com/nginxinc/nginx-gateway-fabric/pkg/telemetry" ) //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . GraphGetter @@ -57,9 +57,9 @@ type Data struct { Arch string DeploymentID string ImageSource string - Flags config.Flags K8sVersion string K8sPlatform string + Flags config.Flags NGFResourceCounts NGFResourceCounts NodeCount int NGFReplicaCount int @@ -99,19 +99,12 @@ func NewDataCollectorImpl( // Collect collects and returns telemetry Data. func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { - nodeCount, err := CollectNodeCount(ctx, c.cfg.K8sClientReader) - if err != nil { - return Data{}, fmt.Errorf("failed to collect node count: %w", err) - } - - nodes, err := collectNodeList(ctx, c.cfg.K8sClientReader) + clusterInfo, err := collectClusterInformation(ctx, c.cfg.K8sClientReader) if err != nil { return Data{}, err } - node := nodes.Items[0] - k8sVersion := node.Status.NodeInfo.KubeletVersion - k8sPlatform := strings.Split(node.Spec.ProviderID, "://")[0] + nodeCount := CollectNodeCount(clusterInfo.Nodes) graphResourceCount, err := collectGraphResourceCount(c.cfg.GraphGetter, c.cfg.ConfigurationGetter) if err != nil { @@ -133,11 +126,6 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { return Data{}, fmt.Errorf("failed to get NGF deploymentID: %w", err) } - var clusterID string - if clusterID, err = CollectClusterID(ctx, c.cfg.K8sClientReader); err != nil { - return Data{}, fmt.Errorf("failed to collect clusterID: %w", err) - } - data := Data{ NodeCount: nodeCount, NGFResourceCounts: graphResourceCount, @@ -146,26 +134,21 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { Version: c.cfg.Version, }, NGFReplicaCount: replicaCount, - ClusterID: clusterID, + ClusterID: clusterInfo.ClusterID, ImageSource: c.cfg.ImageSource, Arch: runtime.GOARCH, DeploymentID: deploymentID, Flags: c.cfg.Flags, - K8sVersion: k8sVersion, - K8sPlatform: k8sPlatform, + K8sVersion: clusterInfo.Version, + K8sPlatform: clusterInfo.Platform, } return data, nil } // CollectNodeCount returns the number of nodes in the cluster. -func CollectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) { - var nodes v1.NodeList - if err := k8sClient.List(ctx, &nodes); err != nil { - return 0, fmt.Errorf("failed to get NodeList: %w", err) - } - - return len(nodes.Items), nil +func CollectNodeCount(nodes v1.NodeList) int { + return len(nodes.Items) } func collectGraphResourceCount( @@ -278,7 +261,7 @@ func CollectClusterID(ctx context.Context, k8sClient client.Reader) (string, err return string(kubeNamespace.GetUID()), nil } -func collectNodeList(ctx context.Context, k8sClient client.Reader) (v1.NodeList, error) { +func CollectNodeList(ctx context.Context, k8sClient client.Reader) (v1.NodeList, error) { var nodes v1.NodeList if err := k8sClient.List(ctx, &nodes); err != nil { return nodes, fmt.Errorf("failed to get NodeList: %w", err) @@ -286,3 +269,45 @@ func collectNodeList(ctx context.Context, k8sClient client.Reader) (v1.NodeList, return nodes, nil } + +type clusterInformation struct { + Platform string + Version string + ClusterID string + Nodes v1.NodeList +} + +func collectClusterInformation(ctx context.Context, k8sClient client.Reader) (clusterInformation, error) { + var clusterInfo clusterInformation + + nodes, err := CollectNodeList(ctx, k8sClient) + if err != nil { + return clusterInformation{}, fmt.Errorf("failed to collect cluster information: %w", err) + } + if len(nodes.Items) == 0 { + return clusterInformation{}, errors.New("failed to collect cluster information: NodeList length is zero") + } + + clusterInfo.Nodes = nodes + + var clusterID string + if clusterID, err = CollectClusterID(ctx, k8sClient); err != nil { + return clusterInformation{}, fmt.Errorf("failed to collect cluster information: %w", err) + } + clusterInfo.ClusterID = clusterID + + node := nodes.Items[0] + clusterInfo.Version = node.Status.NodeInfo.KubeletVersion + if clusterInfo.Version == "" { + clusterInfo.Version = "unknown" + } + + var namespaces v1.NamespaceList + if err = k8sClient.List(ctx, &namespaces); err != nil { + return clusterInformation{}, fmt.Errorf("failed to collect cluster information: %w", err) + } + + clusterInfo.Platform = telemetry.CollectK8sPlatform(node, namespaces) + + return clusterInfo, nil +} diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 271bbc0ca8..fc1f837f24 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -3,7 +3,6 @@ package telemetry_test import ( "context" "errors" - "fmt" "reflect" "runtime" @@ -25,20 +24,23 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) -func createListCallsFunc(nodes []v1.Node) func( - ctx context.Context, - list client.ObjectList, - option ...client.ListOption, -) error { - return func(_ context.Context, list client.ObjectList, option ...client.ListOption) error { +type listCallsFunc = func( + context.Context, + client.ObjectList, + ...client.ListOption, +) error + +func createListCallsFunc(objects ...client.ObjectList) listCallsFunc { + return func(_ context.Context, object client.ObjectList, option ...client.ListOption) error { Expect(option).To(BeEmpty()) - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = append(typedList.Items, nodes...) - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) + for _, obj := range objects { + if reflect.TypeOf(obj) == reflect.TypeOf(object) { + reflect.ValueOf(object).Elem().Set(reflect.ValueOf(obj).Elem()) + return nil + } } + return nil } } @@ -79,7 +81,9 @@ var _ = Describe("Collector", Ordered, func() { ngfReplicaSet *appsv1.ReplicaSet kubeNamespace *v1.Namespace baseGetCalls getCallsFunc + baseListCalls listCallsFunc flags config.Flags + nodeList *v1.NodeList ) BeforeAll(func() { @@ -131,12 +135,30 @@ var _ = Describe("Collector", Ordered, func() { Names: []string{"boolFlag", "intFlag", "stringFlag"}, Values: []string{"false", "default", "user-defined"}, } + + nodeList = &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "k3s://ip-172-16-0-210", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.28.6+k3s2", + }, + }, + }, + }, + } }) BeforeEach(func() { expData = telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 0, + NodeCount: 1, NGFResourceCounts: telemetry.NGFResourceCounts{}, NGFReplicaCount: 1, ClusterID: string(kubeNamespace.GetUID()), @@ -144,6 +166,8 @@ var _ = Describe("Collector", Ordered, func() { Arch: runtime.GOARCH, DeploymentID: string(ngfReplicaSet.ObjectMeta.OwnerReferences[0].UID), Flags: flags, + K8sPlatform: "k3s", + K8sVersion: "v1.28.6+k3s2", } k8sClientReader = &eventsfakes.FakeReader{} @@ -165,6 +189,9 @@ var _ = Describe("Collector", Ordered, func() { baseGetCalls = createGetCallsFunc(ngfPod, ngfReplicaSet, kubeNamespace) k8sClientReader.GetCalls(baseGetCalls) + + baseListCalls = createListCallsFunc(nodeList) + k8sClientReader.ListCalls(baseListCalls) }) mergeGetCallsWithBase := func(f getCallsFunc) getCallsFunc { @@ -184,20 +211,30 @@ var _ = Describe("Collector", Ordered, func() { Describe("Normal case", func() { When("collecting telemetry data", func() { It("collects all fields", func() { - nodes := []v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "kind://docker/kind/kind-control-plane", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node3", + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, }, }, } @@ -287,6 +324,8 @@ var _ = Describe("Collector", Ordered, func() { Services: 3, Endpoints: 4, } + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "kind" data, err := dataCollector.Collect(ctx) @@ -296,31 +335,61 @@ var _ = Describe("Collector", Ordered, func() { }) }) - Describe("clusterID collector", func() { - When("collecting clusterID", func() { + Describe("cluster information collector", func() { + When("collecting node count data", func() { + It("collects correct data for one node", func() { + k8sClientReader.ListCalls(createListCallsFunc(nodeList)) + + expData.NodeCount = 1 + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + When("it encounters an error while collecting data", func() { - It("should error if the kubernetes client errored when getting the namespace", func() { - expectedError := errors.New("there was an error getting clusterID") - k8sClientReader.GetCalls(mergeGetCallsWithBase( - func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error { - switch object.(type) { - case *v1.Namespace: - return expectedError - } - return nil - })) + It("should error when there are no nodes", func() { + expectedError := errors.New("failed to collect cluster information: NodeList length is zero") + k8sClientReader.ListCalls(createListCallsFunc(nil)) + + _, err := dataCollector.Collect(ctx) + + Expect(err).To(MatchError(expectedError)) + }) + It("should error on kubernetes client api errors", func() { + expectedError := errors.New("there was an error getting NodeList") + k8sClientReader.ListReturns(expectedError) _, err := dataCollector.Collect(ctx) Expect(err).To(MatchError(expectedError)) }) }) }) - }) - Describe("node count collector", func() { - When("collecting node count data", func() { - It("collects correct data for no nodes", func() { - k8sClientReader.ListCalls(createListCallsFunc(nil)) + When("collecting cluster platform data", func() { + It("collects Kind platform", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "kind://docker/kind/kind-control-plane", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "kind" data, err := dataCollector.Collect(ctx) @@ -328,30 +397,220 @@ var _ = Describe("Collector", Ordered, func() { Expect(expData).To(Equal(data)) }) - It("collects correct data for one node", func() { - nodes := []v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + It("collects GKE platform", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "gce://test-data/us-central1-c/test-data", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, }, } k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "gke" - expData.NodeCount = 1 + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + + It("collects AKS platform", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "azure://test-data/us-central1-c/test-data", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "aks" + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + + It("collects EKS platform", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws://test-data/us-central1-c/test-data", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "eks" + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + + It("collects Rancher platform", func() { + namespaceList := &v1.NamespaceList{ + Items: []v1.Namespace{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cattle-system", + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodeList, namespaceList)) + + expData.K8sVersion = "v1.28.6+k3s2" + expData.K8sPlatform = "rancher" data, err := dataCollector.Collect(ctx) Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) + + It("collects Openshift platform", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{"node.openshift.io/os_id": "test"}, + }, + Spec: v1.NodeSpec{ + ProviderID: "k3s://test-data/us-central1-c/test-data", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "openshift" + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + + When("platform is none of the above", func() { + It("marks the platform as 'other'", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "other-cloud-provider", + }, + Status: v1.NodeStatus{ + NodeInfo: v1.NodeSystemInfo{ + KubeletVersion: "v1.29.2", + }, + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "v1.29.2" + expData.K8sPlatform = "other" + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + }) + }) + When("collecting cluster version data", func() { + When("the kublet version is missing", func() { + It("should be report 'unknown'", func() { + nodes := &v1.NodeList{ + Items: []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + ProviderID: "k3s://ip-172-16-0-210", + }, + }, + }, + } + + k8sClientReader.ListCalls(createListCallsFunc(nodes)) + expData.K8sVersion = "unknown" + expData.K8sPlatform = "k3s" + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + }) }) - When("it encounters an error while collecting data", func() { - It("should error on kubernetes client api errors", func() { - expectedError := errors.New("there was an error getting NodeList") - k8sClientReader.ListReturns(expectedError) - _, err := dataCollector.Collect(ctx) - Expect(err).To(MatchError(expectedError)) + When("collecting clusterID data", func() { + When("it encounters an error while collecting data", func() { + It("should error if the kubernetes client errored when getting the namespace", func() { + expectedError := errors.New("there was an error getting clusterID") + k8sClientReader.GetCalls(mergeGetCallsWithBase( + func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error { + switch object.(type) { + case *v1.Namespace: + return expectedError + } + return nil + })) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedError)) + }) }) }) }) diff --git a/internal/mode/static/usage/job_worker.go b/internal/mode/static/usage/job_worker.go index 3f039181e3..4fcb5b61cb 100644 --- a/internal/mode/static/usage/job_worker.go +++ b/internal/mode/static/usage/job_worker.go @@ -19,11 +19,13 @@ func CreateUsageJobWorker( cfg config.Config, ) func(ctx context.Context) { return func(ctx context.Context) { - nodeCount, err := telemetry.CollectNodeCount(ctx, k8sClient) + nodelist, err := telemetry.CollectNodeList(ctx, k8sClient) if err != nil { - logger.Error(err, "Failed to collect node count") + logger.Error(err, "Failed to collect nodes") } + nodeCount := telemetry.CollectNodeCount(nodelist) + podCount, err := GetTotalNGFPodCount(ctx, k8sClient) if err != nil { logger.Error(err, "Failed to collect replica count") diff --git a/pkg/telemetry/platform.go b/pkg/telemetry/platform.go new file mode 100644 index 0000000000..a6f5836628 --- /dev/null +++ b/pkg/telemetry/platform.go @@ -0,0 +1,100 @@ +package telemetry + +import ( + "strings" + + v1 "k8s.io/api/core/v1" +) + +const ( + openshiftIdentifier = "node.openshift.io/os_id" + k3sIdentifier = "k3s" + awsIdentifier = "aws" + gkeIdentifier = "gce" + azureIdentifier = "azure" + kindIdentifier = "kind" + rancherIdentifier = "cattle-system" +) + +func CollectK8sPlatform(node v1.Node, namespaces v1.NamespaceList) string { + if result := isMultiplePlatforms(node, namespaces); result != "" { + return result + } + + if isAWSPlatform(node) { + return "eks" + } + if isGKEPlatform(node) { + return "gke" + } + if isAzurePlatform(node) { + return "aks" + } + if isKindPlatform(node) { + return "kind" + } + if isK3SPlatform(node) { + return "k3s" + } + + return "other" +} + +// isMultiplePlatforms checks for platforms that run on other platforms. e.g. Rancher on K3s. +func isMultiplePlatforms(node v1.Node, namespaces v1.NamespaceList) string { + if isRancherPlatform(namespaces) { + return "rancher" + } + + if isOpenshiftPlatform(node) { + return "openshift" + } + + return "" +} + +// For each of these, if we want to we can do both check the providerID AND check labels/annotations, +// I'm not too sure why we would want to do BOTH. +// +// I think doing both would add a greater certainty of a specific platform, however will potentially add to upkeep +// where if either the label/annotation or providerID changes it will mess this up and may group more clusters in +// the "Other" platform if they messed with any of the node labels/annotations. + +// I think it will be fine just to do the providerID check as + +func isOpenshiftPlatform(node v1.Node) bool { + // openshift platform won't show up in node's ProviderID + value, ok := node.Labels[openshiftIdentifier] + + return ok && value != "" +} + +func isK3SPlatform(node v1.Node) bool { + return strings.HasPrefix(node.Spec.ProviderID, k3sIdentifier) +} + +func isAWSPlatform(node v1.Node) bool { + return strings.HasPrefix(node.Spec.ProviderID, awsIdentifier) +} + +func isGKEPlatform(node v1.Node) bool { + return strings.HasPrefix(node.Spec.ProviderID, gkeIdentifier) +} + +func isAzurePlatform(node v1.Node) bool { + return strings.HasPrefix(node.Spec.ProviderID, azureIdentifier) +} + +func isKindPlatform(node v1.Node) bool { + return strings.HasPrefix(node.Spec.ProviderID, kindIdentifier) +} + +func isRancherPlatform(namespaces v1.NamespaceList) bool { + // rancher platform won't show up in the node's ProviderID + for _, ns := range namespaces.Items { + if ns.Name == rancherIdentifier { + return true + } + } + return false +}