Skip to content

Commit

Permalink
EDGECLOUD-5230: Envoy crashing for UDP packet size larger than 1500 (#…
Browse files Browse the repository at this point in the history
…1443)

* add debug cmd + use envoy version 1.18.x + update envoy yaml to use v3 api
  • Loading branch information
ashxjain authored Aug 9, 2021
1 parent 459dba2 commit 6019015
Show file tree
Hide file tree
Showing 15 changed files with 254 additions and 67 deletions.
4 changes: 4 additions & 0 deletions cloud-resource-manager/crmutil/controller-data.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ func NewControllerData(pf platform.Platform, key *edgeproto.CloudletKey, nodeMgr
cd.updateVMWorkers.Init("vmpool-updatevm", cd.UpdateVMPool)
cd.updateTrustPolicyKeyworkers.Init("update-TrustPolicy", cd.UpdateTrustPolicy)
cd.settings = *edgeproto.GetDefaultSettings()

// debug functions
nodeMgr.Debug.AddDebugFunc("envoyversioncmd", cd.GetClusterEnvoyVersion)

return cd
}

Expand Down
72 changes: 72 additions & 0 deletions cloud-resource-manager/crmutil/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,78 @@ func (s *ExecReqHandler) RecvExecRequest(ctx context.Context, msg *edgeproto.Exe
}()
}

type EnvoyContainerVersion struct {
ContainerName string
EnvoyVersion string
Error string
}

type RootLBEnvoyVersion struct {
NodeType string
NodeName string
EnvoyContainers []EnvoyContainerVersion
}

func (cd *ControllerData) GetClusterEnvoyVersion(ctx context.Context, req *edgeproto.DebugRequest) string {
clusterInsts := []edgeproto.ClusterInst{}
cd.ClusterInstCache.Mux.Lock()
for _, v := range cd.ClusterInstCache.Objs {
clusterInsts = append(clusterInsts, *v.Obj)
}
cd.ClusterInstCache.Mux.Unlock()
nodes, err := cd.platform.ListCloudletMgmtNodes(ctx, clusterInsts, nil)
if err != nil {
return fmt.Sprintf("unable to get list of cluster nodes, %v", err)
}
if len(nodes) == 0 {
return fmt.Sprintf("no nodes found")
}
nodeVersions := []RootLBEnvoyVersion{}
for _, node := range nodes {
if !strings.Contains(node.Type, "rootlb") {
continue
}
client, err := cd.platform.GetNodePlatformClient(ctx, &node)
if err != nil {
return fmt.Sprintf("failed to get ssh client for node %s, %v", node.Name, err)
}
out, err := client.Output(`docker ps --format "{{.Names}}" --filter name="^envoy"`)
if err != nil {
log.SpanLog(ctx, log.DebugLevelInfra, "failed to find envoy containers on rootlb", "rootlb", node, "err", err, "out", out)
return fmt.Sprintf("failed to find envoy containers on rootlb %s, %v", node.Name, err)
}
nodeVersion := RootLBEnvoyVersion{
NodeType: node.Type,
NodeName: node.Name,
}
for _, name := range strings.Split(out, "\n") {
name = strings.TrimSpace(name)
if name == "" {
continue
}
envoyContainerVers := EnvoyContainerVersion{
ContainerName: name,
}
out, err := client.Output(`docker exec -it %s envoy --version`)
if err != nil {
log.SpanLog(ctx, log.DebugLevelInfra, "failed to find envoy container version on rootlb", "rootlb", node, "container", name, "err", err, "out", out)
envoyContainerVers.Error = err.Error()
nodeVersion.EnvoyContainers = append(nodeVersion.EnvoyContainers, envoyContainerVers)
continue
}
version := strings.TrimSpace(out)
envoyContainerVers.EnvoyVersion = version
nodeVersion.EnvoyContainers = append(nodeVersion.EnvoyContainers, envoyContainerVers)
}
nodeVersions = append(nodeVersions, nodeVersion)
}
out, err := json.Marshal(nodeVersions)
if err != nil {
return fmt.Sprintf("Failed to marshal node versions: %s, %v", string(out), err)
}
return string(out)
}

func (cd *ControllerData) ProcessExecReq(ctx context.Context, req *edgeproto.ExecRequest) (reterr error) {
var err error

Expand Down
29 changes: 23 additions & 6 deletions cloud-resource-manager/proxy/envoy.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ func createEnvoyYaml(ctx context.Context, client ssh.Client, yamldir, name, list
ListenIP: listenIP,
BackendIP: backendIP,
BackendPort: internalPort,
MaxPktSize: p.MaxPktSize,
}
udpconns, err := getUDPConcurrentConnections()
if err != nil {
Expand Down Expand Up @@ -250,12 +251,14 @@ static_resources:
filter_chains:
- filters:
- name: envoy.filters.network.tcp_proxy
config:
typed_config:
'@type': type.googleapis.com/envoy.extensions.filters.network.tcp_proxy.v3.TcpProxy
stat_prefix: ingress_tcp
cluster: backend{{.BackendPort}}
access_log:
- name: envoy.access_loggers.file
config:
typed_config:
'@type': type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: /tmp/access.log
json_format: {
"start_time": "%START_TIME%",
Expand Down Expand Up @@ -283,12 +286,21 @@ static_resources:
protocol: UDP
address: {{.ListenIP}}
port_value: {{.ListenPort}}
{{if ne .MaxPktSize 0 -}}
udp_listener_config:
downstream_socket_config:
max_rx_datagram_size: {{.MaxPktSize}}
{{- end}}
listener_filters:
name: envoy.filters.udp_listener.udp_proxy
typed_config:
'@type': type.googleapis.com/envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig
stat_prefix: downstream{{.BackendPort}}
cluster: udp_backend{{.BackendPort}}
{{if ne .MaxPktSize 0 -}}
upstream_socket_config:
max_rx_datagram_size: {{.MaxPktSize}}
{{- end}}
reuse_port: true
{{- end}}
clusters:
Expand All @@ -300,10 +312,15 @@ static_resources:
thresholds:
max_connections: {{.ConcurrentConns}}
lb_policy: round_robin
hosts:
- socket_address:
address: {{.BackendIP}}
port_value: {{.BackendPort}}
load_assignment:
cluster_name: backend{{.BackendPort}}
endpoints:
lb_endpoints:
- endpoint:
address:
socket_address:
address: {{.BackendIP}}
port_value: {{.BackendPort}}
{{if .HealthCheck -}}
health_checks:
- timeout: 1s
Expand Down
1 change: 1 addition & 0 deletions cloud-resource-manager/proxy/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ type UDPSpecDetail struct {
BackendIP string
BackendPort int32
ConcurrentConns uint64
MaxPktSize int64
}

var nginxConf = `
Expand Down
2 changes: 1 addition & 1 deletion cloudcommon/names.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const MaxClusterNameLength = 40

// Common cert name. Cannot use common name as filename since envoy doesn't know if the app is dedicated or not
const CertName = "envoyTlsCerts"
const EnvoyImageDigest = "sha256:9bc06553ad6add6bfef1d8a1b04f09721415975e2507da0a2d5b914c066474df"
const EnvoyImageDigest = "sha256:2b07bb8dd35c2a4bb273652b62e85b0bd27d12da94fa11061a9c365d4352e7f9"

// PlatformApps is the set of all special "platform" developers. Key
// is DeveloperName:AppName. Currently only Samsung's Enabling layer is included.
Expand Down
144 changes: 90 additions & 54 deletions d-match-engine/dme-proto/appcommon.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docker_envoy/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FROM envoyproxy/envoy:v1.15-latest
FROM envoyproxy/envoy:v1.18-latest
RUN apt-get --assume-yes update && apt-get --assume-yes install curl
ENV ENVOY_UID=0 ENVOY_GID=0
Loading

0 comments on commit 6019015

Please sign in to comment.