Skip to content

Commit

Permalink
cni-server: set node NetworkUnavailable condition after join subnet g…
Browse files Browse the repository at this point in the history
…ateway check (#4915)

Signed-off-by: zhangzujian <[email protected]>
  • Loading branch information
zhangzujian authored Jan 9, 2025
1 parent a7fff99 commit e250afa
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 10 deletions.
1 change: 1 addition & 0 deletions charts/kube-ovn/templates/ovn-CR.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ rules:
- ovn-eips
- ovn-eips/status
- nodes
- nodes/status
- pods
verbs:
- get
Expand Down
1 change: 1 addition & 0 deletions dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3665,6 +3665,7 @@ rules:
- ovn-eips
- ovn-eips/status
- nodes
- nodes/status
- pods
verbs:
- get
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func InitNodeGateway(config *Configuration) error {
klog.Errorf("failed to get ip %s with mask %s, %v", ip, joinCIDR, err)
return err
}
return configureNodeNic(portName, ipAddr, gw, joinCIDR, mac, config.MTU)
return configureNodeNic(config.KubeClient, config.NodeName, portName, ipAddr, gw, joinCIDR, mac, config.MTU)
}

func InitMirror(config *Configuration) error {
Expand Down
45 changes: 40 additions & 5 deletions pkg/daemon/ovs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ import (
sriovutilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"

kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
Expand Down Expand Up @@ -591,7 +593,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool
return nil
}

func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
ipStr := util.GetIPWithoutMask(ip)
raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--",
"set", "interface", util.NodeNic, "type=internal", "--",
Expand Down Expand Up @@ -670,11 +672,20 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd

// ping ovn0 gw to activate the flow
klog.Infof("wait ovn0 gw ready")
if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry, nil); err != nil {
status := corev1.ConditionFalse
reason := "JoinSubnetGatewayReachable"
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry, nil); err != nil {
klog.Errorf("failed to init ovn0 check: %v", err)
return err
status = corev1.ConditionTrue
reason = "JoinSubnetGatewayUnreachable"
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
}
return nil
if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil {
klog.Errorf("failed to set node network unavailable condition: %v", err)
}

return err
}

// If OVS restart, the ovn0 port will down and prevent host to pod network,
Expand All @@ -696,7 +707,31 @@ func (c *Controller) loopOvn0Check() {
}
ip := node.Annotations[util.IPAddressAnnotation]
gw := node.Annotations[util.GatewayAnnotation]
if err := waitNetworkReady(util.NodeNic, ip, gw, false, false, 5, nil); err != nil {
status := corev1.ConditionFalse
reason := "JoinSubnetGatewayReachable"
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
if err = waitNetworkReady(util.NodeNic, ip, gw, false, false, 5, nil); err != nil {
klog.Errorf("failed to init ovn0 check: %v", err)
status = corev1.ConditionTrue
reason = "JoinSubnetGatewayUnreachable"
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
}

var alreadySet bool
for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeNetworkUnavailable && condition.Status == corev1.ConditionTrue &&
condition.Reason == reason && condition.Message == message {
alreadySet = true
break
}
}
if !alreadySet {
if err := util.SetNodeNetworkUnavailableCondition(c.config.KubeClient, c.config.NodeName, status, reason, message); err != nil {
klog.Errorf("failed to set node network unavailable condition: %v", err)
}
}

if err != nil {
util.LogFatalAndExit(err, "failed to ping ovn0 gateway %s", gw)
}
}
Expand Down
18 changes: 14 additions & 4 deletions pkg/daemon/ovs_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (

"github.com/Microsoft/hcsshim"
"github.com/containernetworking/plugins/pkg/hns"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"

kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
Expand Down Expand Up @@ -262,7 +264,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool
return nil
}

func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
ipStr := util.GetIPWithoutMask(ip)
raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--",
"set", "interface", util.NodeNic, "type=internal", "--",
Expand Down Expand Up @@ -326,11 +328,19 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd

// ping ovn0 gw to activate the flow
klog.Infof("wait ovn0 gw ready")
if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
status := corev1.ConditionFalse
reason := "JoinSubnetGatewayReachable"
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
klog.Errorf("failed to init ovn0 check: %v", err)
return err
status = corev1.ConditionTrue
reason = "JoinSubnetGatewayUnreachable"
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
}
return nil
if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil {
klog.Errorf("failed to set node network unavailable condition: %v", err)
}
return err
}

// If OVS restart, the ovn0 port will down and prevent host to pod network,
Expand Down
31 changes: 31 additions & 0 deletions pkg/util/k8s.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package util

import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"net"
Expand All @@ -15,6 +17,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

Expand Down Expand Up @@ -169,3 +172,31 @@ func DeploymentIsReady(deployment *appsv1.Deployment) bool {
}
return true
}

func SetNodeNetworkUnavailableCondition(cs kubernetes.Interface, nodeName string, status v1.ConditionStatus, reason, message string) error {
now := metav1.NewTime(time.Now())
patch := map[string]map[string][]v1.NodeCondition{
"status": {
"conditions": []v1.NodeCondition{{
Type: v1.NodeNetworkUnavailable,
Status: status,
Reason: reason,
Message: message,
LastTransitionTime: now,
LastHeartbeatTime: now,
}},
},
}
data, err := json.Marshal(patch)
if err != nil {
klog.Errorf("failed to marshal patch data: %v", err)
return err
}

if _, err = cs.CoreV1().Nodes().PatchStatus(context.Background(), nodeName, data); err != nil {
klog.Errorf("failed to patch node %s: %v", nodeName, err)
return err
}

return nil
}

0 comments on commit e250afa

Please sign in to comment.