Skip to content

Commit

Permalink
1.10.2 changes (#1830)
Browse files Browse the repository at this point in the history
* VlanID changes

PPSG Test agent changes

CNI metrics helper changes

minor fix

Move logging for CLUSTER_ID and Region inside publisher.go from main.go

revert manifest changes as the image is not released yet

* Cherry pick multi-arch changes in release branch

* cni-metrics-helper changes
vlanID changes
disable network provisioning fix
ipamd error code fix

* Minor change

* remove redundant changes

* Go version changes

* Switch to public ECR for AL2 (#1804)

* Switch to public ecr for al2 image

- Removed docker_arch variable which is redundant with this change

* Fix makefile and dockerfile entries

* Merge changes to auto-sync manifests

* minor change: add ContainerID to dummyVlanInterface

* Remove sudo from workflow files (#1818)

* remove set -x from bash, add -Ss to curl (#1802)

* remove weekly-cron-test.yml as its not needed for this branch

Co-authored-by: Vikas Basavaraj <[email protected]>
Co-authored-by: Jayanth Varavani <[email protected]>
Co-authored-by: Scott Merrill <[email protected]>
  • Loading branch information
4 people authored Feb 3, 2022
1 parent 02b61d6 commit 6b269a3
Show file tree
Hide file tree
Showing 32 changed files with 413 additions and 264 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build-multi-arch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
runs-on: ubuntu-latest
steps:

- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

- name: Check out code into the Go module directory
Expand All @@ -32,10 +32,10 @@ jobs:
runs-on: [self-hosted, linux, arm64]
steps:

- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

- name: Check out code into the Go module directory
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cron-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
integration-cron:
runs-on: [self-hosted, linux, x64]
steps:
- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

- name: Check out code into the Go module directory
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/forked-pr-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ jobs:
if:
github.event.inputs.pull_request_number != ''
steps:
- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

# Check out a merge commit
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
integration-trusted:
runs-on: [self-hosted, linux, x64]
steps:
- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

- name: Check out code into the Go module directory
Expand All @@ -29,7 +29,7 @@ jobs:
go install golang.org/x/tools/cmd/goimports@latest
- name: Clean up stale docker images
run: sudo docker image prune -f
run: docker image prune -f

- name: Run e2e tests
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: VPC CNI Release
on: [push, workflow_dispatch]

env:
DEFAULT_GO_VERSION: ^1.15
DEFAULT_GO_VERSION: ^1.17
GITHUB_USERNAME: ${{ secrets.EKS_BOT_GITHUB_USERNAME }}
GITHUB_TOKEN: ${{ secrets.EKS_BOT_GITHUB_TOKEN }}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
runs-on: ubuntu-latest
steps:

- name: Set up Go 1.16
- name: Set up Go 1.17
uses: actions/setup-go@v2
with:
go-version: '1.16'
go-version: '1.17'
id: go

- name: Check out code into the Go module directory
Expand Down
8 changes: 2 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ HELM_CHART_NAME ?= "aws-vpc-cni"
# TEST_IMAGE is the testing environment container image.
TEST_IMAGE = amazon-k8s-cni-test
TEST_IMAGE_NAME = $(TEST_IMAGE)$(IMAGE_ARCH_SUFFIX):$(VERSION)
# These values derive ARCH and DOCKER_ARCH which are needed by dependencies in
# These values derive ARCH which is needed by dependencies in
# image build defaulting to system's architecture when not specified.
#
# UNAME_ARCH is the runtime architecture of the building host.
Expand All @@ -52,16 +52,13 @@ UNAME_ARCH = $(shell uname -m)
#
# These are pairs of input_arch to derived_arch separated by colons:
ARCH = $(lastword $(subst :, ,$(filter $(UNAME_ARCH):%,x86_64:amd64 aarch64:arm64)))
# DOCKER_ARCH is the docker specific architecture specifier used for building on
# multiarch container images.
DOCKER_ARCH = $(lastword $(subst :, ,$(filter $(ARCH):%,amd64:amd64 arm64:arm64v8)))
# IMAGE_ARCH_SUFFIX is the `-arch` suffix included in the container image name.
#
# This is only applied to the arm64 container image by default. Override to
# provide an alternate suffix or to omit.
IMAGE_ARCH_SUFFIX = $(addprefix -,$(filter $(ARCH),arm64))
# GOLANG_IMAGE is the building golang container image used.
GOLANG_IMAGE = golang:1.16-stretch
GOLANG_IMAGE = public.ecr.aws/docker/library/golang:1.17-stretch
# For the requested build, these are the set of Go specific build environment variables.
export GOARCH ?= $(ARCH)
export GOOS = linux
Expand Down Expand Up @@ -96,7 +93,6 @@ DOCKER_RUN_FLAGS = --rm -ti $(DOCKER_ARGS)
# DOCKER_BUILD_FLAGS is the set of flags passed during container image builds
# based on the requested build.
DOCKER_BUILD_FLAGS = --build-arg GOARCH="$(ARCH)" \
--build-arg docker_arch="$(DOCKER_ARCH)" \
--build-arg golang_image="$(GOLANG_IMAGE)" \
--network=host \
$(DOCKER_ARGS)
Expand Down
1 change: 1 addition & 0 deletions charts/cni-metrics-helper/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ image:

env:
USE_CLOUDWATCH: "true"
AWS_CLUSTER_ID: ""

fullnameOverride: "cni-metrics-helper"

Expand Down
96 changes: 96 additions & 0 deletions cmd/cni-metrics-helper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,102 @@ The following diagram shows how `cni-metrics-helper` works in a cluster:

![](../../docs/images/cni-metrics-helper.png)

### Using IRSA
As per [AWS EKS Security Best Practice](https://docs.aws.amazon.com/eks/latest/userguide/best-practices-security.html), if you are using IRSA for pods then following requirements must be satisfied to succesfully publish metrics to CloudWatch

1. The IAM Role for your SA [(IRSA)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) must have following policy attached

```
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"cloudwatch:PutMetricData"
],
"Resource": "*"
}
]
}
```

2. You should have similar ClusterRole and ClusterRoleBinding for the IRSA

```
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cni-metrics-helper
rules:
- apiGroups: [""]
resources:
- pods
- pods/proxy
verbs: ["get", "watch", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cni-metrics-helper
labels:
app.kubernetes.io/name: cni-metrics-helper
app.kubernetes.io/instance: cni-metrics-helper
app.kubernetes.io/version: "v1.10.2"
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cni-metrics-helper
subjects:
- kind: ServiceAccount
name: <IRSA name>
namespace: kube-system
```

3. Specify the IRSA name in the cni-metrics-helper deployment spec alongwith the AWS_CLUSTER_ID (as described below). The value that you specify here will show up under the dimension 'CLUSTER_ID' for your published metrics. Specifying value for this field is mandatory only if you are blocking IMDS access

#### `AWS_CLUSTER_ID`

Type: String

Default: `""`

An Identifier for your Cluster which will be used as the dimension for published metrics. Ideally it should be ClusterName or ClusterID.

```
kind: Deployment
apiVersion: apps/v1
metadata:
name: cni-metrics-helper
namespace: kube-system
labels:
k8s-app: cni-metrics-helper
spec:
selector:
matchLabels:
k8s-app: cni-metrics-helper
template:
metadata:
labels:
k8s-app: cni-metrics-helper
spec:
containers:
- env:
- name: USE_CLOUDWATCH
value: "true"
- name: AWS_CLUSTER_ID
value: ""
name: cni-metrics-helper
image: <image>
serviceAccountName: <IRSA name>
```
With IRSA, the above deployment spec will be auto-injected with AWS_REGION parameter and it will be used to fetch Region information when we publish metrics.
Possible Scenarios for above configuration
1. If you are not using IRSA, then Region and CLUSTER_ID information will be fetched using IMDS (should have access)
2. If you are using IRSA but have not specified AWS_CLUSTER_ID, we will fetch the value for CLUSTER_ID if IMDS access is not blocked
3. If you have blocked IMDS access, then you must specify a value for AWS_CLUSTER_ID in the deployment spec
4. If you have not blocked IMDS access but have specified AWS_CLUSTER_ID value, then this value will be used.

### Installing the cni-metrics-helper
```
kubectl apply -f v1.6/cni-metrics-helper.yaml
Expand Down
53 changes: 39 additions & 14 deletions cmd/routed-eni-cni-plugin/cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ import (
)

const ipamdAddress = "127.0.0.1:50051"
const vlanInterfaceName = "vlanId"

const vlanInterfacePrefix = "vlan"
const dummyVlanInterfacePrefix = "dummy"

var version string

Expand Down Expand Up @@ -129,8 +131,7 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap

log.Infof("Received CNI add request: ContainerID(%s) Netns(%s) IfName(%s) Args(%s) Path(%s) argsStdinData(%s)",
args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)

log.Infof("Prev Result: %v\n", conf.PrevResult)
log.Debugf("Prev Result: %v\n", conf.PrevResult)

var k8sArgs K8sArgs
if err := cniTypes.LoadArgs(args.Args, &k8sArgs); err != nil {
Expand Down Expand Up @@ -202,10 +203,24 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
}

var hostVethName string
var dummyVlanInterface *current.Interface

// Non-zero value means pods are using branch ENI
if r.PodVlanId != 0 {
hostVethName = generateHostVethName("vlan", string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
hostVethName = generateHostVethName(vlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupPodENINetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.PodVlanId), r.PodENIMAC,
r.PodENISubnetGW, int(r.ParentIfIndex), mtu, log)

// This is a dummyVlanInterfaceName generated to identify dummyVlanInterface
// which will be created for PPSG scenario to pass along the vlanId information
// as a part of the ADD cmd Result struct
// The podVlanId is used by DEL cmd, fetched from the prevResult struct to cleanup the pod network
dummyVlanInterfaceName := generateHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))

// The dummyVlanInterface is purely virtual and relevent only for ppsg, so we decided to keep it separate
// and not overload the already available hostVethInterface
dummyVlanInterface = &current.Interface{Name: dummyVlanInterfaceName, Mac: fmt.Sprint(r.PodVlanId), Sandbox: args.ContainerID}
log.Debugf("Using dummy vlanInterface: %v", dummyVlanInterface)
} else {
// build hostVethName
// Note: the maximum length for linux interface name is 15
Expand Down Expand Up @@ -249,18 +264,20 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap

hostInterface := &current.Interface{Name: hostVethName}
containerInterface := &current.Interface{Name: args.IfName, Sandbox: args.Netns}
vlanInterface := &current.Interface{Name: vlanInterfaceName, Mac: fmt.Sprint(r.PodVlanId)}
log.Infof("Using vlanInterface: %v", vlanInterface)

result := &current.Result{
IPs: ips,
Interfaces: []*current.Interface{
hostInterface,
containerInterface,
vlanInterface,
},
}

// We append dummyVlanInterface only for pods using branch ENI
if dummyVlanInterface != nil {
result.Interfaces = append(result.Interfaces, dummyVlanInterface)
}

return cniTypes.PrintResult(result, conf.CNIVersion)
}

Expand All @@ -281,7 +298,7 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
driverClient driver.NetworkAPIs) error {

conf, log, err := LoadNetConf(args.StdinData)
log.Infof("Prev Result: %v\n", conf.PrevResult)
log.Debugf("Prev Result: %v\n", conf.PrevResult)

if err != nil {
return errors.Wrap(err, "add cmd: error loading config from args")
Expand All @@ -296,6 +313,10 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return errors.Wrap(err, "del cmd: failed to load k8s config from args")
}

// With containerd as the runtime, it was observed that sometimes spurious delete requests
// are triggered from kubelet with an empty Netns. This check safeguards against such
// scenarios and we just return
// ref: https://github.com/kubernetes/kubernetes/issues/44100#issuecomment-329780382
if args.Netns == "" {
log.Info("Netns() is empty, so network already cleanedup. Nothing to do")
return nil
Expand All @@ -306,18 +327,22 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
// prevResult might not be availabe, if we are still using older cni spec < 0.4.0.
// So we should fallback to the old clean up method
if ok {
dummyVlanInterfaceName := generateHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
for _, iface := range prevResult.Interfaces {
if iface.Name == vlanInterfaceName {
if iface.Name == dummyVlanInterfaceName {
podVlanId, err := strconv.Atoi(iface.Mac)
if err != nil {
return errors.Wrap(err, "Failed to parse vlanId from prevResult")
log.Errorf("Failed to parse vlanId from prevResult: %v", err)
return errors.Wrap(err, "del cmd: failed to parse vlanId from prevResult")
}
// podVlanId == 0 means pod is not using branch ENI
// then fallback to existing cleanup

// podVlanID can not be 0 as we add dummyVlanInterface only for ppsg
// if it is 0 then we should return an error
if podVlanId == 0 {
break
log.Errorf("Found SG pod:%s namespace:%s with 0 vlanID", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE)
return errors.Wrap(err, "del cmd: found Incorrect 0 vlandId for ppsg")
}
// if podVlanId != 0 means pod is using branch ENI

err = cleanUpPodENI(podVlanId, log, args.ContainerID, driverClient)
if err != nil {
return err
Expand Down
Loading

0 comments on commit 6b269a3

Please sign in to comment.