-
Notifications
You must be signed in to change notification settings - Fork 457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add BGP capabilities to the NAT GW #4285
Changes from all commits
bf75fc4
221d51b
eebe739
a3719d9
d847183
eda5e2f
0499362
5cbd510
e59763c
561897d
3576338
66a6095
f6fbee5
4448dc9
2b84143
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ import ( | |
"errors" | ||
"fmt" | ||
"maps" | ||
"os" | ||
"reflect" | ||
"regexp" | ||
"slices" | ||
|
@@ -735,6 +736,52 @@ func (c *Controller) execNatGwRules(pod *corev1.Pod, operation string, rules []s | |
return nil | ||
} | ||
|
||
func (c *Controller) setNatGwInterface(annotations map[string]string, externalNetwork string, defaultSubnet *kubeovnv1.Subnet) error { | ||
if vpcNatAPINadName == "" { | ||
return errors.New("no NetworkAttachmentDefinition provided to access apiserver, check configmap ovn-vpc-nat-config and field 'apiNadName'") | ||
} | ||
|
||
nad := fmt.Sprintf("%s/%s, %s/%s", c.config.PodNamespace, externalNetwork, corev1.NamespaceDefault, vpcNatAPINadName) | ||
annotations[util.AttachmentNetworkAnnotation] = nad | ||
|
||
return setNatGwRoute(annotations, defaultSubnet.Spec.Gateway) | ||
} | ||
|
||
func setNatGwRoute(annotations map[string]string, subnetGw string) error { | ||
dst := os.Getenv("KUBERNETES_SERVICE_HOST") | ||
|
||
protocol := util.CheckProtocol(dst) | ||
if !strings.ContainsRune(dst, '/') { | ||
switch protocol { | ||
case kubeovnv1.ProtocolIPv4: | ||
dst = fmt.Sprintf("%s/32", dst) | ||
case kubeovnv1.ProtocolIPv6: | ||
dst = fmt.Sprintf("%s/128", dst) | ||
} | ||
} | ||
|
||
// Check the API NetworkAttachmentDefinition exists, otherwise we won't be able to attach | ||
// the BGP speaker to a network that has access to the K8S apiserver (and won't be able to detect EIPs) | ||
if vpcNatAPINadProvider == "" { | ||
return errors.New("no NetworkAttachmentDefinition provided to access apiserver, check configmap ovn-vpc-nat-config and field 'apiNadName'") | ||
} | ||
|
||
for _, gw := range strings.Split(subnetGw, ",") { | ||
if util.CheckProtocol(gw) == protocol { | ||
routes := []request.Route{{Destination: dst, Gateway: gw}} | ||
buf, err := json.Marshal(routes) | ||
if err != nil { | ||
return fmt.Errorf("failed to marshal routes %+v: %w", routes, err) | ||
} | ||
|
||
annotations[fmt.Sprintf(util.RoutesAnnotationTemplate, vpcNatAPINadProvider)] = string(buf) | ||
break | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet) (*v1.StatefulSet, error) { | ||
annotations := make(map[string]string, 7) | ||
if oldSts != nil && len(oldSts.Annotations) != 0 { | ||
|
@@ -747,6 +794,18 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1 | |
util.LogicalSwitchAnnotation: gw.Spec.Subnet, | ||
util.IPAddressAnnotation: gw.Spec.LanIP, | ||
} | ||
|
||
if gw.Spec.BgpSpeaker.Enabled { // Add an interface that can reach the API server | ||
defaultSubnet, err := c.subnetsLister.Get(c.config.DefaultLogicalSwitch) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we have introduced configMap option There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should use function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Kubernetes apiserver runs in the default subnet doesn't it? The NAD just happens to be connected to that subnet. Do you want me to:
That can be an option There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
K8s apiserver runs in control plane nodes with host network. It can be accessed from subnets in the default vpc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The NAD can be pointed to ANY subnet which is running in the default vpc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, should kube-ovn have a default NAD installed in the default VPC for kube-dns and the NAT GW? That would be extremely handy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is an example I'm using: apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: ovn-nad
namespace: default
spec:
config: '{
"cniVersion": "0.3.0",
"type": "kube-ovn",
"server_socket": "/run/openvswitch/kube-ovn-daemon.sock",
"provider": "ovn-nad.default.ovn"
}'
---
apiVersion: kubeovn.io/v1
kind: Subnet
metadata:
name: vpc-apiserver-subnet
spec:
protocol: IPv4
cidrBlock: 100.100.100.0/24
provider: ovn-nad.default.ovn There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
NAD resources can be created only when the CRD is installed in the cluster, while the CRD should be installed by users. |
||
if err != nil { | ||
return nil, fmt.Errorf("failed to get default subnet %s: %w", c.config.DefaultLogicalSwitch, err) | ||
} | ||
|
||
if err := c.setNatGwInterface(podAnnotations, nadName, defaultSubnet); err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
||
for key, value := range podAnnotations { | ||
annotations[key] = value | ||
} | ||
|
@@ -782,6 +841,7 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1 | |
routes = append(routes, request.Route{Destination: cidrV6, Gateway: v6Gateway}) | ||
} | ||
} | ||
|
||
if err = setPodRoutesAnnotation(annotations, util.OvnProvider, routes); err != nil { | ||
klog.Error(err) | ||
return nil, err | ||
|
@@ -820,6 +880,7 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1 | |
"app": name, | ||
util.VpcNatGatewayLabel: "true", | ||
} | ||
|
||
sts := &v1.StatefulSet{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: name, | ||
|
@@ -859,6 +920,106 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1 | |
}, | ||
}, | ||
} | ||
|
||
// BGP speaker for GWs must be enabled globally and for this specific instance | ||
if gw.Spec.BgpSpeaker.Enabled { | ||
containers := sts.Spec.Template.Spec.Containers | ||
|
||
// We need a speaker image configured in the NAT GW ConfigMap | ||
if vpcNatGwBgpSpeakerImage == "" { | ||
return nil, fmt.Errorf("%s should have bgp speaker image field if bgp enabled", util.VpcNatConfig) | ||
} | ||
|
||
args := []string{ | ||
"--nat-gw-mode", // Force to run in NAT GW mode, we're not announcing Pod IPs or Services, only EIPs | ||
} | ||
|
||
speakerParams := gw.Spec.BgpSpeaker | ||
|
||
if speakerParams.RouterID != "" { // Override default auto-selected RouterID | ||
args = append(args, fmt.Sprintf("--router-id=%s", speakerParams.RouterID)) | ||
} | ||
|
||
if speakerParams.Password != "" { // Password for TCP MD5 BGP | ||
args = append(args, fmt.Sprintf("--auth-password=%s", speakerParams.Password)) | ||
} | ||
|
||
if speakerParams.EnableGracefulRestart { // Enable graceful restart | ||
args = append(args, "--graceful-restart") | ||
} | ||
|
||
if speakerParams.HoldTime != (metav1.Duration{}) { // Hold time | ||
args = append(args, fmt.Sprintf("--holdtime=%s", speakerParams.HoldTime.Duration.String())) | ||
} | ||
|
||
if speakerParams.ASN == 0 { // The ASN we use to speak | ||
return nil, errors.New("ASN not set, but must be non-zero value") | ||
} | ||
|
||
if speakerParams.RemoteASN == 0 { // The ASN we speak to | ||
return nil, errors.New("remote ASN not set, but must be non-zero value") | ||
} | ||
|
||
args = append(args, fmt.Sprintf("--cluster-as=%d", speakerParams.ASN)) | ||
args = append(args, fmt.Sprintf("--neighbor-as=%d", speakerParams.RemoteASN)) | ||
|
||
if len(speakerParams.Neighbors) == 0 { | ||
return nil, errors.New("no BGP neighbors specified") | ||
} | ||
|
||
var neighIPv4 []string | ||
var neighIPv6 []string | ||
for _, neighbor := range speakerParams.Neighbors { | ||
switch util.CheckProtocol(neighbor) { | ||
case kubeovnv1.ProtocolIPv4: | ||
neighIPv4 = append(neighIPv4, neighbor) | ||
case kubeovnv1.ProtocolIPv6: | ||
neighIPv6 = append(neighIPv6, neighbor) | ||
} | ||
} | ||
|
||
argNeighIPv4 := strings.Join(neighIPv4, ",") | ||
argNeighIPv6 := strings.Join(neighIPv6, ",") | ||
argNeighIPv4 = fmt.Sprintf("--neighbor-address=%s", argNeighIPv4) | ||
argNeighIPv6 = fmt.Sprintf("--neighbor-ipv6-address=%s", argNeighIPv6) | ||
|
||
if len(neighIPv4) > 0 { | ||
args = append(args, argNeighIPv4) | ||
} | ||
|
||
if len(neighIPv6) > 0 { | ||
args = append(args, argNeighIPv6) | ||
} | ||
|
||
// Extra args to start the speaker with, for example, logging levels... | ||
args = append(args, speakerParams.ExtraArgs...) | ||
|
||
sts.Spec.Template.Spec.ServiceAccountName = "vpc-nat-gw" | ||
speakerContainer := corev1.Container{ | ||
Name: "vpc-nat-gw-speaker", | ||
Image: vpcNatGwBgpSpeakerImage, | ||
Command: []string{"/kube-ovn/kube-ovn-speaker"}, | ||
ImagePullPolicy: corev1.PullIfNotPresent, | ||
Env: []corev1.EnvVar{ | ||
{ | ||
Name: util.GatewayNameEnv, | ||
Value: gw.Name, | ||
}, | ||
{ | ||
Name: "POD_IP", | ||
ValueFrom: &corev1.EnvVarSource{ | ||
FieldRef: &corev1.ObjectFieldSelector{ | ||
FieldPath: "status.podIP", | ||
}, | ||
}, | ||
}, | ||
}, | ||
Args: args, | ||
} | ||
|
||
sts.Spec.Template.Spec.Containers = append(containers, speakerContainer) | ||
} | ||
|
||
return sts, nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the NAD namspace is always
default
, there is no need to set bothapiNadName
andapiNadProvider
in the configMap since<apiNadProvider>
equals<apiNadName>.default.ovn
or<apiNadName>.default
.How about getting NAD name and namespace from the NAD provider?