Skip to content

Commit

Permalink
make time sync e2e test more reliable for dynamic infra
Browse files Browse the repository at this point in the history
  • Loading branch information
devigned committed May 6, 2021
1 parent 294c81d commit 7d0045d
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 43 deletions.
70 changes: 40 additions & 30 deletions test/e2e/azure_timesync.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import (
"context"
"fmt"
"strings"
"time"

. "github.com/onsi/gomega"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/cluster-api/test/framework"
kinderrors "sigs.k8s.io/kind/pkg/errors"
Expand All @@ -42,6 +44,7 @@ func AzureTimeSyncSpec(ctx context.Context, inputGetter func() AzureTimeSyncSpec
var (
specName = "azure-timesync"
input AzureTimeSyncSpecInput
thirtySeconds = 30*time.Second
)

input = inputGetter()
Expand All @@ -50,40 +53,47 @@ func AzureTimeSyncSpec(ctx context.Context, inputGetter func() AzureTimeSyncSpec
namespace, name := input.Namespace.Name, input.ClusterName
managementClusterClient := input.BootstrapClusterProxy.GetClient()

sshInfo, err := getClusterSSHInfo(ctx, managementClusterClient, namespace, name)
Expect(err).NotTo(HaveOccurred())
Expect(len(sshInfo)).To(BeNumerically(">", 0))
Eventually(func() error {
sshInfo, err := getClusterSSHInfo(ctx, managementClusterClient, namespace, name)
if err != nil {
return err
}

testfuncs := []func() error{}
for _, s := range sshInfo {
Byf("checking that time synchronization is healthy on %s", s.Hostname)
if len(sshInfo) <= 0 {
return errors.New("sshInfo did not contain any machines")
}

execToStringFn := func(expected, command string, args ...string) func() error {
// don't assert in this test func, just return errors
return func() error {
f := &strings.Builder{}
if err := execOnHost(s.Endpoint, s.Hostname, s.Port, f, command, args...); err != nil {
return err
var testFuncs []func() error
for _, s := range sshInfo {
Byf("checking that time synchronization is healthy on %s", s.Hostname)

execToStringFn := func(expected, command string, args ...string) func() error {
// don't assert in this test func, just return errors
return func() error {
f := &strings.Builder{}
if err := execOnHost(s.Endpoint, s.Hostname, s.Port, f, command, args...); err != nil {
return err
}
if !strings.Contains(f.String(), expected) {
return fmt.Errorf("expected \"%s\" in command output:\n%s", expected, f.String())
}
return nil
}
if !strings.Contains(f.String(), expected) {
return fmt.Errorf("expected \"%s\" in command output:\n%s", expected, f.String())
}
return nil
}

testFuncs = append(testFuncs,
execToStringFn(
"✓ chronyd is active",
"systemctl", "is-active", "chronyd", "&&",
"echo", "✓ chronyd is active",
),
execToStringFn(
"Reference ID",
"chronyc", "tracking",
),
)
}

testfuncs = append(testfuncs,
execToStringFn(
"✓ chronyd is active",
"systemctl", "is-active", "chronyd", "&&",
"echo", "✓ chronyd is active",
),
execToStringFn(
"Reference ID",
"chronyc", "tracking",
),
)
}

Expect(kinderrors.AggregateConcurrent(testfuncs)).To(Succeed())
return kinderrors.AggregateConcurrent(testFuncs)
}, thirtySeconds, thirtySeconds).Should(Succeed())
}
6 changes: 3 additions & 3 deletions test/e2e/config/azure-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ providers:
value: "${PWD}/config/default"
files:
- sourcePath: "../data/shared/v1alpha4_provider/metadata.yaml"
replacements:
- old: "--v=0"
new: "--v=4"
files:
- sourcePath: "../data/shared/v1alpha4/metadata.yaml"
- sourcePath: "${PWD}/templates/test/ci/cluster-template-prow.yaml"
Expand Down Expand Up @@ -81,9 +84,6 @@ providers:
targetName: "cluster-template-machine-pool-windows.yaml"
- sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-external-cloud-provider.yaml"
targetName: "cluster-template-external-cloud-provider.yaml"
replacements:
- old: "--v=0"
new: "--v=4"

variables:
KUBERNETES_VERSION: "${KUBERNETES_VERSION:-v1.19.7}"
Expand Down
94 changes: 84 additions & 10 deletions test/e2e/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,22 @@ import (
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1"
typedbatchv1 "k8s.io/client-go/kubernetes/typed/batch/v1"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
"sigs.k8s.io/cluster-api/controllers/noderefutil"
clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1alpha4"
"sigs.k8s.io/cluster-api/test/framework"
"sigs.k8s.io/cluster-api/test/framework/kubernetesversions"
"sigs.k8s.io/cluster-api/util"
utilkubeconfig "sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -304,18 +309,17 @@ type nodeSSHInfo struct {
// getClusterSSHInfo returns the information needed to establish a SSH connection through a
// control plane endpoint to each node in the cluster.
func getClusterSSHInfo(ctx context.Context, c client.Client, namespace, name string) ([]nodeSSHInfo, error) {
sshInfo := []nodeSSHInfo{}

var sshInfo []nodeSSHInfo
// Collect the info for each VM / Machine.
machines, err := getMachinesInCluster(ctx, c, namespace, name)
if err != nil {
return nil, err
return sshInfo, errors.Wrap(err, "failed to get machines in the cluster")
}
for i := range machines.Items {
m := &machines.Items[i]
cluster, err := util.GetClusterFromMetadata(ctx, c, m.ObjectMeta)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "failed to get cluster from metadata")
}
sshInfo = append(sshInfo, nodeSSHInfo{
Endpoint: cluster.Spec.ControlPlaneEndpoint.Host,
Expand All @@ -327,28 +331,98 @@ func getClusterSSHInfo(ctx context.Context, c client.Client, namespace, name str
// Collect the info for each instance in a VMSS / MachinePool.
machinePools, err := getMachinePoolsInCluster(ctx, c, namespace, name)
if err != nil {
return nil, err
return sshInfo, errors.Wrap(err, "failed to find machine pools in cluster")
}

// make a workload client to access the workload cluster
workloadClient, err := getWorkloadClient(ctx, c, namespace, name)
if err != nil {
return sshInfo, errors.Wrap(err, "failed to get workload client")
}

for i := range machinePools.Items {
p := &machinePools.Items[i]
cluster, err := util.GetClusterFromMetadata(ctx, c, p.ObjectMeta)
if err != nil {
return nil, err
return sshInfo, errors.Wrap(err, "failed to get cluster from metadata")
}

nodes, err := getReadyNodes(ctx, workloadClient, p.Status.NodeRefs)
if err != nil {
return sshInfo, errors.Wrap(err, "failed to get ready nodes")
}
for j := range p.Status.NodeRefs {
n := p.Status.NodeRefs[j]

if p.Spec.Replicas != nil && len(nodes) < int(*p.Spec.Replicas) {
message := fmt.Sprintf("machine pool %s/%s expected replicas %d, but only found %d ready nodes", p.Namespace, p.Name, *p.Spec.Replicas, len(nodes))
Log(message)
return sshInfo, errors.New(message)
}

for _, node := range nodes {
sshInfo = append(sshInfo, nodeSSHInfo{
Endpoint: cluster.Spec.ControlPlaneEndpoint.Host,
Hostname: n.Name,
Hostname: node.Name,
Port: e2eConfig.GetVariable(VMSSHPort),
})
}

}

return sshInfo, nil
}

func getReadyNodes(ctx context.Context, c client.Client, refs []corev1.ObjectReference) ([]corev1.Node, error) {
var nodes []corev1.Node
for _, ref := range refs {
var node corev1.Node
if err := c.Get(ctx, client.ObjectKey{
Namespace: ref.Namespace,
Name: ref.Name,
}, &node); err != nil {
if apierrors.IsNotFound(err) {
// If 404, continue. Likely the node refs have not caught up to infra providers
continue
}

return nodes, err
}

if !noderefutil.IsNodeReady(&node) {
Logf("node is not ready and won't be counted for ssh info %s/%s", node.Namespace, node.Name)
continue
}

nodes = append(nodes, node)
}

return nodes, nil
}

func getWorkloadClient(ctx context.Context, c client.Client, namespace, clusterName string) (client.Client, error) {
ctx, span := tele.Tracer().Start(ctx, "scope.MachinePoolMachineScope.getWorkloadClient")
defer span.End()

obj := client.ObjectKey{
Namespace: namespace,
Name: clusterName,
}
dataBytes, err := utilkubeconfig.FromSecret(ctx, c, obj)
if err != nil {
return nil, errors.Wrapf(err, "\"%s-kubeconfig\" not found in namespace %q", obj.Name, obj.Namespace)
}

cfg, err := clientcmd.Load(dataBytes)
if err != nil {
return nil, errors.Wrapf(err, "failed to load \"%s-kubeconfig\" in namespace %q", obj.Name, obj.Namespace)
}

restConfig, err := clientcmd.NewDefaultClientConfig(*cfg, &clientcmd.ConfigOverrides{}).ClientConfig()
if err != nil {
return nil, errors.Wrapf(err, "failed transform config \"%s-kubeconfig\" in namespace %q", obj.Name, obj.Namespace)
}

return client.New(restConfig, client.Options{})
}

// getMachinesInCluster returns a list of all machines in the given cluster.
// This is adapted from CAPI's test/framework/cluster_proxy.go.
func getMachinesInCluster(ctx context.Context, c framework.Lister, namespace, name string) (*clusterv1.MachineList, error) {
Expand Down

0 comments on commit 7d0045d

Please sign in to comment.