Merge pull request #1383 from cliveseldon/1378_ambassador_params

Update amabassador retries and restructure tests for Ambassador
SeldonIO · Mar 3, 2020 · e8eaa12 · e8eaa12
2 parents 14c1b09 + 72df49f
commit e8eaa12
Show file tree

Hide file tree

Showing 3 changed files with 107 additions and 418 deletions.
diff --git a/doc/source/ingress/ambassador.md b/doc/source/ingress/ambassador.md
@@ -26,7 +26,26 @@ Assuming a Seldon Deployment ```mymodel``` with Ambassador exposed on `0.0.0.0:8
 curl -v 0.0.0.0:8003/seldon/mymodel/api/v1.0/predictions -d '{"data":{"names":["a","b"],"tensor":{"shape":[2,2],"values":[0,0,1,1]}}}' -H "Content-Type: application/json"
 ```
 
-## Canary Deployments
+## Ambassador Configuration Annotations Reference
+
+| Annotation | Description |
+|------------|-------------|
+|`seldon.io/ambassador-config:<configuration>`| Custom Ambassador Configuration |
+|`seldon.io/ambassador-header:<header>`| The header to add to Ambassador configuration |
+|`seldon.io/ambassador-id:<instance id>`| The instance id to be added to Ambassador `ambassador_id` configuration |
+|`seldon.io/ambassador-regex-header:<regex>`| The regular expression header to use for routing via headers|
+|`seldon.io/ambassador-retries:<number of retries>` | The number of times ambassador will retry request on connect-failure. Default 0. Use custom configuration if more control needed.|
+|`seldon.io/ambassador-service-name:<existing_deployment_name>`| The name of the existing Seldon Deployment for shadow or header based routing |
+|`seldon.io/ambassador-shadow:true` | Activate shadowing for this deployment |
+|`seldon.io/grpc-read-timeout: <gRPC read timeout (msecs)>` | gRPC read timeout |
+|`seldon.io/rest-read-timeout:<REST read timeout (msecs)>` | REST read timeout |
+
+All annotations should be placed in `spec.annotations`.
+
+See below for details.
+
+
+### Canary Deployments
 
 Canary rollouts are available where you wish to push a certain percentage of traffic to a new model to test whether it works ok in production. To add a canary to your SeldonDeployment simply add a new predictor section and set the traffic levels for the main and canary to desired levels. For example:
 
@@ -73,7 +92,7 @@ The above example has a "main" predictor with 75% of traffic and a "canary" with
 
 A worked example for [canary deployments](../examples/ambassador_canary.html) is provided.
 
-## Shadow Deployments
+### Shadow Deployments
 
 Shadow deployments allow you to send duplicate requests to a parallel deployment but throw away the response. This allows you to test machine learning models under load and compare the results to the live deployment. 
 
@@ -83,15 +102,17 @@ A worked example for [shadow deployments](../examples/ambassador_shadow.html) is
 
 To understand more about the Ambassador configuration for this see [their docs on shadow deployments](https://www.getambassador.io/reference/shadowing/).
 
-## Header based Routing
+### Header based Routing
 
 Header based routing allows you to route requests to particular Seldon Deployments based on headers in the incoming requests.
 
 You simply need to add some annotations to your Seldon Deployment resource.
 
   * `seldon.io/ambassador-header:<header>` : The header to add to Ambassador configuration	    
      * Example:  `"seldon.io/ambassador-header":"location: london"	    `
-  * `seldon.io/ambassador-service-name:<existing_deployment_name>` : The name of the existing Seldon you want to attach to as an alternative mapping for requests. 
+  * `seldon.io/ambassador-regex-header:<header>` : The regular expression header to add to Ambassador configuration	    
+     * Example:  `"seldon.io/ambassador-header":"location: lond.*"	    `
+  * `seldon.io/ambassador-service-name:<existing_deployment_name>` : The name of the existing Seldon Deployment you want to attach to as an alternative mapping for requests. 
      * Example: `"seldon.io/ambassador-service-name":"example"`
 
 A worked example for [header based routing](../examples/ambassador_headers.html) is provided.
@@ -118,9 +139,10 @@ spec:
 ```
 
 Note that your Ambassador instance must be configured with matching `ambassador_id`.
+
 See [AMBASSADOR_ID](https://github.com/datawire/ambassador/blob/master/docs/reference/running.md#ambassador_id) for details
 
-## Custom Amabassador configuration
+### Custom Amabassador configuration
 
 The above discussed configurations should cover most cases but there maybe a case where you want to have a very particular Ambassador configuration under your control. You can acheieve this by adding your confguration as an annotation to your Seldon Deployment resource.
 
@@ -129,3 +151,5 @@ The above discussed configurations should cover most cases but there maybe a cas
 
 A worked example for [custom Ambassador config](../examples/ambassador_custom.html) is provided.
 
+
+
diff --git a/operator/controllers/ambassador.go b/operator/controllers/ambassador.go
@@ -17,10 +17,12 @@ const (
 	ANNOTATION_AMBASSADOR_HEADER       = "seldon.io/ambassador-header"
 	ANNOTATION_AMBASSADOR_REGEX_HEADER = "seldon.io/ambassador-regex-header"
 	ANNOTATION_AMBASSADOR_ID           = "seldon.io/ambassador-id"
+	ANNOTATION_AMBASSADOR_RETRIES      = "seldon.io/ambassador-retries"
 
 	YAML_SEP = "---\n"
 
-	AMBASSADOR_IDLE_TIMEOUT = 300000
+	AMBASSADOR_IDLE_TIMEOUT    = 300000
+	AMBASSADOR_DEFAULT_RETRIES = "0"
 )
 
 // Struct for Ambassador configuration
@@ -67,7 +69,12 @@ func getAmbassadorRestConfig(mlDep *machinelearningv1.SeldonDeployment,
 	// Set timeout
 	timeout, err := strconv.Atoi(getAnnotation(mlDep, ANNOTATION_REST_TIMEOUT, "3000"))
 	if err != nil {
-		return "", nil
+		return "", err
+	}
+
+	retries, err := strconv.Atoi(getAnnotation(mlDep, ANNOTATION_AMBASSADOR_RETRIES, AMBASSADOR_DEFAULT_RETRIES))
+	if err != nil {
+		return "", err
 	}
 
 	name := p.Name
@@ -84,10 +91,13 @@ func getAmbassadorRestConfig(mlDep *machinelearningv1.SeldonDeployment,
 		Rewrite:    "/",
 		Service:    serviceName + "." + namespace + ":" + strconv.Itoa(engine_http_port),
 		TimeoutMs:  timeout,
-		RetryPolicy: &AmbassadorRetryPolicy{
+	}
+
+	if retries != 0 {
+		c.RetryPolicy = &AmbassadorRetryPolicy{
 			RetryOn:    "connect-failure",
-			NumRetries: 3,
-		},
+			NumRetries: retries,
+		}
 	}
 
 	if weight != nil {
@@ -158,6 +168,11 @@ func getAmbassadorGrpcConfig(mlDep *machinelearningv1.SeldonDeployment,
 		return "", nil
 	}
 
+	retries, err := strconv.Atoi(getAnnotation(mlDep, ANNOTATION_AMBASSADOR_RETRIES, AMBASSADOR_DEFAULT_RETRIES))
+	if err != nil {
+		return "", err
+	}
+
 	name := p.Name
 	if nameOverride != "" {
 		name = nameOverride
@@ -175,10 +190,13 @@ func getAmbassadorGrpcConfig(mlDep *machinelearningv1.SeldonDeployment,
 		Headers:     map[string]string{"seldon": serviceNameExternal},
 		Service:     serviceName + "." + namespace + ":" + strconv.Itoa(engine_grpc_port),
 		TimeoutMs:   timeout,
-		RetryPolicy: &AmbassadorRetryPolicy{
+	}
+
+	if retries != 0 {
+		c.RetryPolicy = &AmbassadorRetryPolicy{
 			RetryOn:    "connect-failure",
-			NumRetries: 3,
-		},
+			NumRetries: retries,
+		}
 	}
 
 	if weight != nil {