From 1a0ccf0e9f1db62e77d840fde3fc3b18b2b7c976 Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Tue, 7 Mar 2023 16:41:21 -0600 Subject: [PATCH 1/9] add: WIP for smithy orchestrator RFC --- design/src/SUMMARY.md | 1 + .../src/rfcs/rfc0034_smithy_orchestrator.md | 453 ++++++++++++++++++ 2 files changed, 454 insertions(+) create mode 100644 design/src/rfcs/rfc0034_smithy_orchestrator.md diff --git a/design/src/SUMMARY.md b/design/src/SUMMARY.md index ae53de0a47..ae41ea5246 100644 --- a/design/src/SUMMARY.md +++ b/design/src/SUMMARY.md @@ -56,6 +56,7 @@ - [RFC-0031: Providing Fallback Credentials on Timeout](./rfcs/rfc0031_providing_fallback_credentials_on_timeout.md) - [RFC-0032: Better Constraint Violations](./rfcs/rfc0032_better_constraint_violations.md) - [RFC-0033: Improving access to request IDs in SDK clients](./rfcs/rfc0033_improve_sdk_request_id_access.md) + - [RFC-0034: Smithy Orchestrator](./rfcs/rfc0034_smithy_orchestrator.md) - [Contributing](./contributing/overview.md) - [Writing and debugging a low-level feature that relies on HTTP](./contributing/writing_and_debugging_a_low-level_feature_that_relies_on_HTTP.md) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md new file mode 100644 index 0000000000..9296b66839 --- /dev/null +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -0,0 +1,453 @@ +# Smithy Orchestrator + +> status: RFC +> applies-to: The smithy client + +This RFC proposes a new process for constructing client requests and handling service responses. This new process is intended to: + +- Improve the user experience by + - Simplifying several aspects of sending a request + - Adding more extension points to the request/response lifecycle +- Improve the maintainer experience by + - Making our SDK more similar in structure to other AWS SDKs + - Simplifying many aspects of the request/response lifecycle + - Making room for future changes + +Additionally, functionality that the SDKs currently provide like retries, logging, and auth with be incorporated into this new +process in such a way as to make it more configurable and understandable. + +## Terminology + +- **SDK Client**: A high-level abstraction allowing users to make requests to remote services. +- **Remote Service**: A remote API that a user wants to use. Communication with a remote service usually happens over HTTP. The remote service is usually, but not necessarily, an AWS service. +- **Operation**: A high-level abstraction representing an interaction between an *SDK Client and a *remote service*. +- **Input Message**: A modeled request passed into an *SDK client*. For example, S3’s `ListObjectsRequest`. +- **Transport Request Message**: A message that can be transmitted to a *remote service*. For example, an HTTP request. +- **Transport Response Message**: A message that can be received from a *remote service*. For example, an HTTP response. +- **Output Message**: A modeled response or exception returned to an *SDK client* caller. For example, S3’s `ListObjectsResponse` or `NoSuchBucketException`. +- **The request/response lifecycle**: The process by which an *SDK client* makes requests and receives responses from a *remote service*. This process is enacted and managed by the *orchestrator*. +- **Orchestrator**: The code within an *SDK client* that handles the process of making requests and receiving responses from *remote services*. The orchestrator is configurable by modifying the *runtime plugins* it's built from. The orchestrator is responsible for calling *interceptors* at the appropriate times in the *request/response lifecycle*. +- **Interceptor**/**Hook**: A generic extension point within the *orchestrator*. Supports "anything that someone should be able to do", NOT "anything anyone might want to do". These hooks are: + - Either **read-only** or **read/write**. + - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. +- **Runtime Plugin**: A runtime plugin defines instructions for how an *SDK client* is configured to use the components below: + - `RetryStrategy`: Defines how requests are retried. + - `TraceProbes`: Defines locations to which SDK metrics are published. + - `EndpointProviders`: Defines which hostname an SDK will call when making a request. + - `HTTPClients`: Defines how remote services are called. + - `IdentityProviders`: Defines how customers identify themselves to remote services. + - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Defines how customers authenticate themselves to remote services. + - `Checksum Algorithms`: Defines how an SDK calculates request and response checksums. + +## The user experience if this RFC is implemented + +For many users, the changes described by this RFC will be invisible. Making a request with an orchestrator-based SDK client looks very similar to the way requests are made pre-RFC: + +```rust +let sdk_config = aws_config::load_from_env().await; +let client = aws_sdk_s3::Client::new(&sdk_config); +let res = client.get_object() + .bucket("a-bucket") + .key("a-file.txt") + .send() + .await?; + +match res { +Ok(res) => println!("success: {:?}"), +Err(err) => eprintln!("failure: {:?}") +}; +``` + +However, if a user wishes to configure clients and operations, they may do so by setting **runtime plugins**. + +### Configuring clients and operations with runtime plugins + +Runtime plugins construct and modify client configuration. Plugin initialization is the first step of sending a request, and plugins set in later steps can override the actions of earlier plugins. Plugin ordering is deterministic and non-customizable. + +While AWS services define a default set of plugins, users may define their own plugins, and set them by calling the appropriate methods on a service's config, client, or operation: + +```rust +let service_config = aws_sdk_s3::Config::builder() + // Multiple interceptors may be added + .with_interceptor(CustomInterceptor::new()) + // Multiple trace probes may be added + .with_trace_probe(CustomTraceProbe::new()) + // Multiple identity providers may be added + .with_identity_provider(CustomIdProvider::new()) + // Multiple checksum algorithms may be added, but if a checksum algorithm's + // ID matches the ID of an existing algorithm, then the new algorithm will + // overwrite the old one. + .with_checksum_algorithm(CustomChecksumAlgorithm::new()) + // Only one retry strategy may be set. Setting a new one will replace the + // old one. + .with_retry_strategy(CustomRetryStrategy::new()) + // Services also define protocol-specific configuration. As an example, here + // are some HTTP client setting. + .with_http_client(CustomHttpClient::new()) + // + .with_http_auth_scheme(CustomHttpAuthScheme::new()) + .build(); + +// TODO HTTP client configuration is called out as separate in the internal design docs. How should it be configured? + +// Plugins can be set on clients. +let client = aws_sdk_s3::Client::builder() + .config(&sdk_config) + .with_plugin(OpenTelemetryPlugin::new()) +.build(); + +// Plugins can be set on operations by using the `customize` method. +let res = client.get_object() + .bucket("a-bucket") + .key("a-file.txt") + .customize(|mut op| { + // Check to see if the `SpecialOperationLogger` was already set. + // If no, then set it and add a special header before returning + // the modified operation. + if !op.plugins().contains(SpecialOperationLogger::id()) { + // set_plugin is just like `with_plugin` except it takes a + // `&mut self` so that we don't need to re-assign `op`. + op.set_plugin(SpecialOperationLogger::new()); + op.headers_mut().insert("Is-Special", "yes"); + } + + op + }) + .send() + .await?; +``` + +Plugins are specifically meant for constructing service and operation configuration. If a user wants to define behavior that should occur at specific points in the *request/response lifecycle*, then they should instead consider defining an *interceptor*. + +### Configuring interceptors + +Interceptors are similar to middlewares, in that they are functions that can read and modify request and response state. However, they are more restrictive than middlewares in that they can't modify the "control flow" of the request/response lifecycle. This is intentional. Interceptors can be registered on a client or operation, and the orchestrator is responsible for calling interceptors at the appropriate time. Users MUST NOT perform blocking IO within an interceptor. Interceptors are sync, and are not intended to perform large amounts of work. This makes them easier to reason about and use. Depending on when they are called, interceptors may read and modify *input messages*, *transport request messages*, *transport response messages*, and *output messages*. Additionally, all interceptors may write to a context object that is shared between all interceptors. + +#### Currently supported hooks + +1. **Read Before Execution *(Read-Only)***: Before anything happens. This is the first +thing the SDK calls during operation execution. +1. **Modify Before Serialization *(Read/Write)***: Before the input message given by +the customer is marshalled into a transport request message. Allows modifying the +input message. +1. **Read Before Serialization *(Read-Only)***: The last thing the SDK calls before +marshaling the input message into a transport message. +1. **Read After Serialization *(Read-Only)***: The first thing the SDK calls after marshaling the input message into a transport message. +1. *(Retry Loop)* + 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. + 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. + 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. + 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. + 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. + 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. + 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. + 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. + 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. + 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. + 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). + 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. +1. **Modify Before Execution Completion *(Read/Write)***: Before the execution ends. Allows modifying the unmarshaled response (output message or error). +1. **Read After Execution *(Read-Only)***: After everything has happened. This is the last thing the SDK calls during operation execution. + +### Interceptor context + +As mentioned above, interceptors may read/write a context object that is shared between all interceptors: + +```rust +pub struct InterceptorContext { + // a.k.a. the input message + modeled_request: ModReq, + // a.k.a. the transport request message + tx_request: Option, + // a.k.a. the output message + modeled_response: Option, + // a.k.a. the transport response message + tx_response: Option, + // A type-keyed map + properties: SharedPropertyBag, +} +``` + +The optional request and response types in the interceptor context can only be accessed by interceptors that are run after specific points in the *request/response lifecycle*. Rather than go into depth in this RFC, I leave that to a future "Interceptors RFC." + +## How to implement this RFC + +### The `aws-smithy-orchestrator` crate + +*I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* + +```rust +pub struct SmithyConfig { + pub interceptors: Interceptors< + In, + http::Request, + http::Response, + Result + >, + pub response_deserializer: DeserializeResponse, + pub token_bucket: Arc, + pub request_serializer: SerializeRequest, + pub endpoint_resolver: Arc>, + // These still need to be generic either because they're service-specific + // or not object safe. + pub service_config: Cfg, + pub retry_classifier: ClassifyRetry, SdkError>, + pub endpoint_parameters: Ep, +} + +// These two macros help shorten the `make_an_attempt` function and make it +// more readable +macro_rules! bail_unless_ok { + ($ctx: ident, $inp: expr) => { + match $inp { + Ok(ok) => ok, + Err(err) => { + $ctx.set_modeled_response(Err(err)); + return; + } + } + }; +} + +macro_rules! bail_if_err { + ($ctx: ident, $inp: expr) => { + if let Err(err) = $inp { + $ctx.set_modeled_response(Err(err)); + return; + } + }; +} + +/// `In`: The input message e.g. `ListObjectsRequest` +/// `T`: The 'success' output message e.g. `ListObjectsResponse` +/// `E`: The 'failure' output message e.g. `NoSuchBucketException` +/// `Cfg`: The service config +/// `Ep`: An endpoint parameters struct +/// `To`: A token from a token bucket +pub async fn invoke( + input: In, + cfg: &Cfg, + mut connection: Connector, +) -> Result, SdkError> +where + // The input must be Clone in case of retries + In: Clone, + // Because the invoke function is not service-specific, and must be able to + // consume service-specific configuration, it needs to convert that config + // into a more generic form. We must also clone it before applying runtime + // plugins. + Cfg: Clone + ApplyPlugins + Into, + // Errors that occur during the request/response lifecycle must be classifiable + // by the request retryer and the token bucket. + E: ProvideErrorKind, +{ + // Create a new interceptor context. + // This will be passed to each interceptor in turn. + let mut ctx = InterceptorContext::new(req); + // We clone the config and apply all registered plugins to it, before converting + // it into a type we can use. + let cfg = cfg.clone().apply_plugins().into(); + + interceptors.read_before_execution(&ctx)?; + interceptors.modify_before_serialization(&mut ctx)?; + interceptors.read_before_serialization(&ctx)?; + + // We clone the input, serialize it into ""-form, + // and store it in the interceptor context. + let mod_req = ctx.modeled_request().clone(); + let req = request_serializer(mod_req)?; + ctx.set_tx_request(req); + + interceptors.read_after_serialization(&ctx)?; + interceptors.modify_before_retry_loop(&mut ctx)?; + + // Making an HTTP request can fail for several reasons, but we still need to + // call lifecycle events when that happens. Therefore, we define this + // `make_an_attempt` function to make error handling simpler. + async fn make_an_attempt( + ctx: &mut InterceptorContext< + In, + http::Request, + http::Response, + Result, SdkError> + >, + interceptors: &mut Interceptors< + In, + http::Request, + http::Response, + Result + >, + endpoint_resolver: &dyn ResolveEndpoint, + endpoint_parameters: &Ep, + connection: &mut Connector, + response_deserializer: &DeserializeResponse, + ) where + In: Clone, + { + interceptors.read_before_attempt(ctx); + let auth_schemes = bail_unless_ok!( + ctx, + resolve_auth_schemes(endpoint_resolver, endpoint_parameters) + ); + let signer = get_signer_for_first_supported_auth_scheme(&auth_schemes); + let identity = bail_unless_ok!( + ctx, + auth_scheme.resolve_identity() + ); + bail_if_err!( + ctx, + resolve_and_apply_endpoint(ctx, endpoint_resolver, &endpoint_parameters) + ); + + bail_if_err!(ctx, interceptors.modify_before_signing(ctx) ); + // 7.h + bail_if_err!(ctx, interceptors.read_before_signing(ctx)); + { + let (tx_req_mut, props) = ctx.tx_request_mut() + .expect("tx_request has been set"); + if let Err(err) = signer(tx_req_mut, &props) { + drop(props); + ctx.set_modeled_response(Err(err.into())); + return; + } + } + bail_if_err!(ctx, interceptors.read_after_signing(ctx)); + bail_if_err!(ctx, interceptors.modify_before_transmit(ctx)); + bail_if_err!(ctx, interceptors.read_before_transmit(ctx)); + + // The connection consumes the request but we need to keep a copy of it + // within the interceptor context, so we clone it here. + let tx_req = try_clone_http_request( + ctx.tx_request().expect("tx_request has been set") + ).expect("tx_request is cloneable"); + let res = bail_unless_ok!(ctx, connection(tx_req).await); + ctx.set_tx_response(res); + + bail_if_err!(ctx, interceptors.read_after_transmit(ctx)); + bail_if_err!(ctx, interceptors.modify_before_deserialization(ctx)); + bail_if_err!(ctx, interceptors.read_before_deserialization(ctx)); + + let (tx_res, _) = ctx.tx_response_mut().expect("tx_response has been set"); + let res = response_deserializer(tx_res); + ctx.set_modeled_response(res); + + bail_if_err!(ctx, interceptors.read_after_deserialization(&ctx)); + } + + let mut attempt = 0; + loop { + if attempt > config.retry.max_attempts() { + break; + } + + // Acquire initial request token. Some retry/quota strategies don't require a + // token for the initial request. We must always ask for the token, + // but taking it may not affect the number of tokens in the bucket. + let mut token = Some(token_bucket.try_acquire(None).map_err(|err| { + SdkError::dispatch_failure(ConnectorError::other( + Box::new(err), + Some(ErrorKind::ClientError), + )) + })?); + + // For attempt other than the first, we need to clear out data set in previous + // attempts. + if attempt > 0 { + ctx.reset(); + } + + make_an_attempt( + &mut ctx, + &mut interceptors, + &signer, + endpoint_resolver.as_ref(), + &endpoint_parameters, + &mut connection, + &response_deserializer, + ) + .await; + + interceptors.read_after_attempt(&ctx)?; + interceptors.modify_before_attempt_completion(&mut ctx)?; + + // Figure out if the last attempt succeeded or failed + let retry_kind: RetryKind = retry_classifier( + ctx.modeled_response().expect("modeled_response has been set").as_ref(), + ); + match retry_kind { + RetryKind::Explicit(_duration) => { + attempt += 1; + token.take().unwrap().forget(); + continue; + } + RetryKind::Error(_err) => { + attempt += 1; + token.take().unwrap().forget(); + continue; + } + RetryKind::UnretryableFailure => { + token.take().unwrap().forget(); + } + RetryKind::Unnecessary => { + token.take().unwrap().release(); + if attempt == 0 { + // Some token buckets refill if a request succeeds with retrying + token_bucket.refill_on_instant_success(); + } + } + _unhandled => unreachable!("encountered unhandled RetryKind {_unhandled:?}"), + } + + interceptors.modify_before_completion(&mut ctx)?; + // Dispatch logging events to all registered tracing probes + cfg.dispatch_events(); + interceptors.read_after_execution(&ctx)?; + + break; + } + + let ( + modeled_response, + tx_response, + property_bag + ) = ctx.into_responses().map_err(SdkError::interceptor_error)?; + let operation_response = operation::Response::from_parts( + tx_response, + property_bag + ); + + match modeled_response { + Ok(output) => Ok(SdkSuccess { + parsed: output, + raw: operation_response, + }), + Err(err) => Err(SdkError::service_error( + err, + operation_response, + )) + } +} +``` + +### The `aws-smithy-interceptors` crate + +The `aws-smithy-interceptors` crate is a stub in this RFC that contains only a few types necessary to partially implement the orchestrator. The specific design of interceptors is left to a future RFC. + +## F.A.Q. + +- The orchestrator is a large and complex feature, with many moving parts. How can we ensure that multiple people can contribute in parallel? + - +- What is the precedence of interceptors? + - The precedence of interceptors is as follows: + - Interceptors registered via Smithy default plugins. + - *(AWS Services only)* Interceptors registered via AWS default plugins. + - Interceptors registered via service-customization plugins. + - Interceptors registered via client-level plugins. + - Interceptors registered via client-level configuration. + - Interceptors registered via operation-level plugins. + - Interceptors registered via operation-level configuration. + +## Changes checklist + +TODO From 425b34e3706fde67dd093b6235db1eec22c1c473 Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Wed, 8 Mar 2023 15:10:06 -0600 Subject: [PATCH 2/9] update: incorporate runtime plugin ideas --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 515 ++++++++---------- 1 file changed, 236 insertions(+), 279 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index 9296b66839..f548614121 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -2,19 +2,27 @@ > status: RFC > applies-to: The smithy client +--- +status: RFC +applies-to: "The smithy client" +--- This RFC proposes a new process for constructing client requests and handling service responses. This new process is intended to: - Improve the user experience by - - Simplifying several aspects of sending a request - - Adding more extension points to the request/response lifecycle +- Simplifying several aspects of sending a request +- Adding more extension points to the request/response lifecycle - Improve the maintainer experience by - - Making our SDK more similar in structure to other AWS SDKs - - Simplifying many aspects of the request/response lifecycle - - Making room for future changes +- Making our SDK more similar in structure to other AWS SDKs +- Simplifying many aspects of the request/response lifecycle +- Making room for future changes -Additionally, functionality that the SDKs currently provide like retries, logging, and auth with be incorporated into this new -process in such a way as to make it more configurable and understandable. +Additionally, functionality that the SDKs currently provide like retries, logging, and auth with be incorporated into this new process in such a way as to make it more configurable and understandable. + +This RFC references but is not the source of truth on: + +- Interceptors: To be described in depth in a future RFC. +- Runtime Plugins: To be described in depth in a future RFC. ## Terminology @@ -28,98 +36,40 @@ process in such a way as to make it more configurable and understandable. - **The request/response lifecycle**: The process by which an *SDK client* makes requests and receives responses from a *remote service*. This process is enacted and managed by the *orchestrator*. - **Orchestrator**: The code within an *SDK client* that handles the process of making requests and receiving responses from *remote services*. The orchestrator is configurable by modifying the *runtime plugins* it's built from. The orchestrator is responsible for calling *interceptors* at the appropriate times in the *request/response lifecycle*. - **Interceptor**/**Hook**: A generic extension point within the *orchestrator*. Supports "anything that someone should be able to do", NOT "anything anyone might want to do". These hooks are: - - Either **read-only** or **read/write**. - - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. -- **Runtime Plugin**: A runtime plugin defines instructions for how an *SDK client* is configured to use the components below: - - `RetryStrategy`: Defines how requests are retried. - - `TraceProbes`: Defines locations to which SDK metrics are published. - - `EndpointProviders`: Defines which hostname an SDK will call when making a request. - - `HTTPClients`: Defines how remote services are called. - - `IdentityProviders`: Defines how customers identify themselves to remote services. - - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Defines how customers authenticate themselves to remote services. - - `Checksum Algorithms`: Defines how an SDK calculates request and response checksums. + - Either **read-only** or **read/write**. + - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. +- **Runtime Plugin**: Runtime plugins are similar to interceptors, but they act on configuration instead of requests and response. Both users and services may define runtime plugins. Smithy also defines several default runtime plugins used by most clients. See the F.A.Q. for a list of plugins with descriptions. ## The user experience if this RFC is implemented -For many users, the changes described by this RFC will be invisible. Making a request with an orchestrator-based SDK client looks very similar to the way requests are made pre-RFC: +For many users, the changes described by this RFC will be invisible. Making a request with an orchestrator-based SDK client looks very similar to the way requests were made pre-RFC: ```rust let sdk_config = aws_config::load_from_env().await; let client = aws_sdk_s3::Client::new(&sdk_config); let res = client.get_object() - .bucket("a-bucket") - .key("a-file.txt") - .send() - .await?; + .bucket("a-bucket") + .key("a-file.txt") + .send() + .await?; match res { -Ok(res) => println!("success: {:?}"), -Err(err) => eprintln!("failure: {:?}") + Ok(res) => println!("success: {:?}"), + Err(err) => eprintln!("failure: {:?}") }; ``` -However, if a user wishes to configure clients and operations, they may do so by setting **runtime plugins**. +Users may further configure clients and operations with **runtime plugins**, and they can modify requests and responses with **interceptors**. We'll examine each of these concepts in the following sections. -### Configuring clients and operations with runtime plugins - -Runtime plugins construct and modify client configuration. Plugin initialization is the first step of sending a request, and plugins set in later steps can override the actions of earlier plugins. Plugin ordering is deterministic and non-customizable. +### Service clients and operations are configured with runtime plugins -While AWS services define a default set of plugins, users may define their own plugins, and set them by calling the appropriate methods on a service's config, client, or operation: +> The exact implementation of **runtime plugins** is left for another RFC. That other RFC will be linked here once it's written. To get an idea of what they may look like, see the *"Layered configuration, stored in type maps"* section of this RFC. -```rust -let service_config = aws_sdk_s3::Config::builder() - // Multiple interceptors may be added - .with_interceptor(CustomInterceptor::new()) - // Multiple trace probes may be added - .with_trace_probe(CustomTraceProbe::new()) - // Multiple identity providers may be added - .with_identity_provider(CustomIdProvider::new()) - // Multiple checksum algorithms may be added, but if a checksum algorithm's - // ID matches the ID of an existing algorithm, then the new algorithm will - // overwrite the old one. - .with_checksum_algorithm(CustomChecksumAlgorithm::new()) - // Only one retry strategy may be set. Setting a new one will replace the - // old one. - .with_retry_strategy(CustomRetryStrategy::new()) - // Services also define protocol-specific configuration. As an example, here - // are some HTTP client setting. - .with_http_client(CustomHttpClient::new()) - // - .with_http_auth_scheme(CustomHttpAuthScheme::new()) - .build(); - -// TODO HTTP client configuration is called out as separate in the internal design docs. How should it be configured? - -// Plugins can be set on clients. -let client = aws_sdk_s3::Client::builder() - .config(&sdk_config) - .with_plugin(OpenTelemetryPlugin::new()) -.build(); - -// Plugins can be set on operations by using the `customize` method. -let res = client.get_object() - .bucket("a-bucket") - .key("a-file.txt") - .customize(|mut op| { - // Check to see if the `SpecialOperationLogger` was already set. - // If no, then set it and add a special header before returning - // the modified operation. - if !op.plugins().contains(SpecialOperationLogger::id()) { - // set_plugin is just like `with_plugin` except it takes a - // `&mut self` so that we don't need to re-assign `op`. - op.set_plugin(SpecialOperationLogger::new()); - op.headers_mut().insert("Is-Special", "yes"); - } - - op - }) - .send() - .await?; -``` +Runtime plugins construct and modify client configuration. Plugin initialization is the first step of sending a request, and plugins set in later steps can override the actions of earlier plugins. Plugin ordering is deterministic and non-customizable. -Plugins are specifically meant for constructing service and operation configuration. If a user wants to define behavior that should occur at specific points in the *request/response lifecycle*, then they should instead consider defining an *interceptor*. +While AWS services define a default set of plugins, users may define their own plugins, and set them by calling the appropriate methods on a service's config, client, or operation. Plugins are specifically meant for constructing service and operation configuration. If a user wants to define behavior that should occur at specific points in the *request/response lifecycle*, then they should instead consider defining an *interceptor*. -### Configuring interceptors +### Requests and responses are modified by interceptors Interceptors are similar to middlewares, in that they are functions that can read and modify request and response state. However, they are more restrictive than middlewares in that they can't modify the "control flow" of the request/response lifecycle. This is intentional. Interceptors can be registered on a client or operation, and the orchestrator is responsible for calling interceptors at the appropriate time. Users MUST NOT perform blocking IO within an interceptor. Interceptors are sync, and are not intended to perform large amounts of work. This makes them easier to reason about and use. Depending on when they are called, interceptors may read and modify *input messages*, *transport request messages*, *transport response messages*, and *output messages*. Additionally, all interceptors may write to a context object that is shared between all interceptors. @@ -134,19 +84,19 @@ input message. marshaling the input message into a transport message. 1. **Read After Serialization *(Read-Only)***: The first thing the SDK calls after marshaling the input message into a transport message. 1. *(Retry Loop)* - 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. - 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. - 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. - 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. - 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. - 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. - 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. - 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. - 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. - 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. - 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. - 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). - 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. + 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. + 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. + 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. + 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. + 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. + 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. + 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. + 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. + 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. + 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). + 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. 1. **Modify Before Execution Completion *(Read/Write)***: Before the execution ends. Allows modifying the unmarshaled response (output message or error). 1. **Read After Execution *(Read-Only)***: After everything has happened. This is the last thing the SDK calls during operation execution. @@ -173,88 +123,129 @@ The optional request and response types in the interceptor context can only be a ## How to implement this RFC -### The `aws-smithy-orchestrator` crate +### Layered configuration, stored in type maps -*I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* +> **Type map**: A data structure where stored values are keyed by their type. Hence, only one value can be stored for a given type. +> +> *See [typemap](https://docs.rs/typemap), [type-map](https://docs.rs/crate/type-map), [http::Extensions](https://docs.rs/http/latest/http/struct.Extensions.html), and [actix_http::Extensions](https://docs.rs/actix-http/latest/actix_http/struct.Extensions.html) for examples.* ```rust -pub struct SmithyConfig { - pub interceptors: Interceptors< - In, - http::Request, - http::Response, - Result - >, - pub response_deserializer: DeserializeResponse, - pub token_bucket: Arc, - pub request_serializer: SerializeRequest, - pub endpoint_resolver: Arc>, - // These still need to be generic either because they're service-specific - // or not object safe. - pub service_config: Cfg, - pub retry_classifier: ClassifyRetry, SdkError>, - pub endpoint_parameters: Ep, -} + let conf: ConfigBag = aws_config::from_env() + // Configuration can be common to all smithy clients + .with(RetryConfig::builder().disable_retries().build()) + // Or, protocol-specific + .with(HttpClient::builder().with_().build()) + // Or, AWS-specific + .with(Region::from("us-east-1")) + // Or, service-specific + .with(S3Config::builder().force_path_style(false).build()) + .await; + +let client = aws_sdk_s3::Client::new(&conf); + +client.list_buckets() + .customize() + // Configuration can be set on operations as well as clients + .with(HttpConfig::builder().conn(some_other_conn).build()) + .send() + .await; +``` -// These two macros help shorten the `make_an_attempt` function and make it -// more readable -macro_rules! bail_unless_ok { - ($ctx: ident, $inp: expr) => { - match $inp { - Ok(ok) => ok, - Err(err) => { - $ctx.set_modeled_response(Err(err)); - return; - } - } - }; +Setting configuration that will not be used wastes memory and can make debugging more difficult. Therefore, configuration defaults are only set when they're relevant. For example, if a smithy service doesn't support HTTP, then no HTTP client will be set. + +#### What is "layered" configuration? + +Configuration has precedence. Configuration set on an operation will override configuration set on a client, and configuration set on a client will override default configuration. However, configuration with a higher precedence can also augment configuration with a lower precedence. For example: + +```rust +let conf: ConfigBag = aws_config::from_env() + .with(SomeConfig::builder() + .option_a(1) + .option_b(2) + .option_c(3) + .build() + ) + .await; + +let client = aws_sdk_s3::Client::new(&conf); + +client.list_buckets() + .customize() + .with(SomeConfig::builder() + .option_a(0) + .option_b(Value::Inherit) + .option_c(Value::Unset) + .build() + ) + .send() + .await; +``` + +In the above example, when the `option_a`, `option_b`, `option_c`, values of `SomeConfig` are accessed, they'll return: + +- `option_a`: `0` +- `option_b`: `2` +- `option_c`: No value + +Config values are wrapped in a special enum called `Value` with three variants: + +- `Value::Set`: A set value that will override values from lower layers. +- `Value::Unset`: An explicitly unset value that will override values from lower layers. +- `Value::Inherit`: An explicitly unset value that will inherit a value from a lower layer. + +Builders are defined like this: + +```rust +struct SomeBuilder { + value: Value, } -macro_rules! bail_if_err { - ($ctx: ident, $inp: expr) => { - if let Err(err) = $inp { - $ctx.set_modeled_response(Err(err)); - return; - } - }; +impl struct SomeBuilder { + fn new() -> Self { + // By default, config values inherit from lower-layer configs + Self { value: Value::Inherit } + } + + fn some_field(&mut self, value: impl Into>) -> &mut self { + self.value = value.into(); + self + } } +``` + +Because of `impl Into>`, users don't need to reference the `Value` enum unless they want to "unset" a value. +### The `aws-smithy-orchestrator` crate + +*I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* + +```rust /// `In`: The input message e.g. `ListObjectsRequest` -/// `T`: The 'success' output message e.g. `ListObjectsResponse` -/// `E`: The 'failure' output message e.g. `NoSuchBucketException` -/// `Cfg`: The service config -/// `Ep`: An endpoint parameters struct -/// `To`: A token from a token bucket -pub async fn invoke( - input: In, - cfg: &Cfg, - mut connection: Connector, -) -> Result, SdkError> +/// `Req`: The transport request message e.g. `http::Request` +/// `Res`: The transport response message e.g. `http::Response` +/// `Out`: The output message. A `Result` containing either: +/// - The 'success' output message e.g. `ListObjectsResponse` +/// - The 'failure' output message e.g. `NoSuchBucketException` +pub async fn invoke(input: In, cfg: &mut ConfigBag) -> Out where - // The input must be Clone in case of retries + // The input must be Clone in case of retries In: Clone, - // Because the invoke function is not service-specific, and must be able to - // consume service-specific configuration, it needs to convert that config - // into a more generic form. We must also clone it before applying runtime - // plugins. - Cfg: Clone + ApplyPlugins + Into, - // Errors that occur during the request/response lifecycle must be classifiable - // by the request retryer and the token bucket. - E: ProvideErrorKind, { - // Create a new interceptor context. - // This will be passed to each interceptor in turn. + // Create a new interceptor context. + // This will be passed to each interceptor in turn. let mut ctx = InterceptorContext::new(req); - // We clone the config and apply all registered plugins to it, before converting - // it into a type we can use. - let cfg = cfg.clone().apply_plugins().into(); + let interceptors: &mut Interceptors, http::Response, Out> = + cfg.get()?; + // We clone the config and apply all registered plugins to it, before converting + // it into a type we can use. + let cfg = cfg.clone().apply_plugins().into(); interceptors.read_before_execution(&ctx)?; interceptors.modify_before_serialization(&mut ctx)?; interceptors.read_before_serialization(&ctx)?; - // We clone the input, serialize it into ""-form, - // and store it in the interceptor context. + // We clone the input, serialize it into ""-form, + // and store it in the interceptor context. let mod_req = ctx.modeled_request().clone(); let req = request_serializer(mod_req)?; ctx.set_tx_request(req); @@ -262,94 +253,21 @@ where interceptors.read_after_serialization(&ctx)?; interceptors.modify_before_retry_loop(&mut ctx)?; - // Making an HTTP request can fail for several reasons, but we still need to - // call lifecycle events when that happens. Therefore, we define this - // `make_an_attempt` function to make error handling simpler. - async fn make_an_attempt( - ctx: &mut InterceptorContext< - In, - http::Request, - http::Response, - Result, SdkError> - >, - interceptors: &mut Interceptors< - In, - http::Request, - http::Response, - Result - >, - endpoint_resolver: &dyn ResolveEndpoint, - endpoint_parameters: &Ep, - connection: &mut Connector, - response_deserializer: &DeserializeResponse, - ) where - In: Clone, - { - interceptors.read_before_attempt(ctx); - let auth_schemes = bail_unless_ok!( - ctx, - resolve_auth_schemes(endpoint_resolver, endpoint_parameters) - ); - let signer = get_signer_for_first_supported_auth_scheme(&auth_schemes); - let identity = bail_unless_ok!( - ctx, - auth_scheme.resolve_identity() - ); - bail_if_err!( - ctx, - resolve_and_apply_endpoint(ctx, endpoint_resolver, &endpoint_parameters) - ); - - bail_if_err!(ctx, interceptors.modify_before_signing(ctx) ); - // 7.h - bail_if_err!(ctx, interceptors.read_before_signing(ctx)); - { - let (tx_req_mut, props) = ctx.tx_request_mut() - .expect("tx_request has been set"); - if let Err(err) = signer(tx_req_mut, &props) { - drop(props); - ctx.set_modeled_response(Err(err.into())); - return; - } - } - bail_if_err!(ctx, interceptors.read_after_signing(ctx)); - bail_if_err!(ctx, interceptors.modify_before_transmit(ctx)); - bail_if_err!(ctx, interceptors.read_before_transmit(ctx)); - - // The connection consumes the request but we need to keep a copy of it - // within the interceptor context, so we clone it here. - let tx_req = try_clone_http_request( - ctx.tx_request().expect("tx_request has been set") - ).expect("tx_request is cloneable"); - let res = bail_unless_ok!(ctx, connection(tx_req).await); - ctx.set_tx_response(res); - - bail_if_err!(ctx, interceptors.read_after_transmit(ctx)); - bail_if_err!(ctx, interceptors.modify_before_deserialization(ctx)); - bail_if_err!(ctx, interceptors.read_before_deserialization(ctx)); - - let (tx_res, _) = ctx.tx_response_mut().expect("tx_response has been set"); - let res = response_deserializer(tx_res); - ctx.set_modeled_response(res); - - bail_if_err!(ctx, interceptors.read_after_deserialization(&ctx)); - } - let mut attempt = 0; loop { if attempt > config.retry.max_attempts() { break; } - // Acquire initial request token. Some retry/quota strategies don't require a - // token for the initial request. We must always ask for the token, - // but taking it may not affect the number of tokens in the bucket. - let mut token = Some(token_bucket.try_acquire(None).map_err(|err| { - SdkError::dispatch_failure(ConnectorError::other( - Box::new(err), - Some(ErrorKind::ClientError), - )) - })?); + // Acquire initial request token. Some retry/quota strategies don't require a + // token for the initial request. We must always ask for the token, + // but taking it may not affect the number of tokens in the bucket. + let mut token = Some(token_bucket.try_acquire(None).map_err(|err| { + SdkError::dispatch_failure(ConnectorError::other( + Box::new(err), + Some(ErrorKind::ClientError), + )) + })?); // For attempt other than the first, we need to clear out data set in previous // attempts. @@ -357,23 +275,17 @@ where ctx.reset(); } - make_an_attempt( - &mut ctx, - &mut interceptors, - &signer, - endpoint_resolver.as_ref(), - &endpoint_parameters, - &mut connection, - &response_deserializer, - ) - .await; + let res = make_an_attempt(&mut cfg, &mut ctx).await; + ctx.set_modeled_response(res); interceptors.read_after_attempt(&ctx)?; interceptors.modify_before_attempt_completion(&mut ctx)?; - // Figure out if the last attempt succeeded or failed + // Figure out if the last attempt succeeded or failed let retry_kind: RetryKind = retry_classifier( - ctx.modeled_response().expect("modeled_response has been set").as_ref(), + ctx.modeled_response() + .expect("modeled_response has been set") + .as_ref(), ); match retry_kind { RetryKind::Explicit(_duration) => { @@ -394,7 +306,7 @@ where if attempt == 0 { // Some token buckets refill if a request succeeds with retrying token_bucket.refill_on_instant_success(); - } + } } _unhandled => unreachable!("encountered unhandled RetryKind {_unhandled:?}"), } @@ -407,46 +319,91 @@ where break; } - let ( - modeled_response, - tx_response, - property_bag - ) = ctx.into_responses().map_err(SdkError::interceptor_error)?; - let operation_response = operation::Response::from_parts( - tx_response, - property_bag - ); + let (modeled_response, tx_response, property_bag) = + ctx.into_responses().map_err(SdkError::interceptor_error)?; + let operation_response = operation::Response::from_parts(tx_response, property_bag); match modeled_response { Ok(output) => Ok(SdkSuccess { parsed: output, raw: operation_response, }), - Err(err) => Err(SdkError::service_error( - err, - operation_response, - )) + Err(err) => Err(SdkError::service_error(err, operation_response)), } } -``` - -### The `aws-smithy-interceptors` crate -The `aws-smithy-interceptors` crate is a stub in this RFC that contains only a few types necessary to partially implement the orchestrator. The specific design of interceptors is left to a future RFC. +// Making an HTTP request can fail for several reasons, but we still need to +// call lifecycle events when that happens. Therefore, we define this +// `make_an_attempt` function to make error handling simpler. +async fn make_an_attempt( + cfg: &mut ConfigBag, + ctx: &mut InterceptorContext, http::Response, Out>, +) where + In: Clone, +{ + let interceptors: &mut Interceptors, http::Response, Out> = + cfg.get()?; + + interceptors.read_before_attempt(ctx); + let auth_schemes = resolve_auth_schemes(ctx, cfg)?; + let signer = get_signer_for_first_supported_auth_scheme(&auth_schemes)?; + let identity = auth_scheme.resolve_identity(cfg)?; + resolve_and_apply_endpoint(ctx, cfg)?; + + interceptors.modify_before_signing(ctx)?; + interceptors.read_before_signing(ctx)?; + let (tx_req_mut, props) = ctx.tx_request_mut().expect("tx_request has been set"); + signer(tx_req_mut, &props)?; + interceptors.read_after_signing(ctx)?; + interceptors.modify_before_transmit(ctx)?; + interceptors.read_before_transmit(ctx)?; + + // The connection consumes the request but we need to keep a copy of it + // within the interceptor context, so we clone it here. + let tx_req = try_clone_http_request(ctx.tx_request().expect("tx_request has been set")) + .expect("tx_request is cloneable"); + let connection = cfg + .get:: Box>>>() + .ok_or_else(|| ConnectorError)?; + let res = connection(tx_req).await?; + ctx.set_tx_response(res); + + interceptors.read_after_transmit(ctx)?; + interceptors.modify_before_deserialization(ctx)?; + interceptors.read_before_deserialization(ctx)?; + + let (tx_res, _) = ctx.tx_response_mut().expect("tx_response has been set"); + let response_deserializer = cfg + .get:: Box>>>() + .ok_or_else(|| CfgError)?; + let res = response_deserializer(tx_res).await?; + ctx.set_modeled_response(res); + + interceptors.read_after_deserialization(&ctx) +} +``` ## F.A.Q. - The orchestrator is a large and complex feature, with many moving parts. How can we ensure that multiple people can contribute in parallel? - - + - - What is the precedence of interceptors? - - The precedence of interceptors is as follows: - - Interceptors registered via Smithy default plugins. - - *(AWS Services only)* Interceptors registered via AWS default plugins. - - Interceptors registered via service-customization plugins. - - Interceptors registered via client-level plugins. - - Interceptors registered via client-level configuration. - - Interceptors registered via operation-level plugins. - - Interceptors registered via operation-level configuration. + - The precedence of interceptors is as follows: + - Interceptors registered via Smithy default plugins. + - *(AWS Services only)* Interceptors registered via AWS default plugins. + - Interceptors registered via service-customization plugins. + - Interceptors registered via client-level plugins. + - Interceptors registered via client-level configuration. + - Interceptors registered via operation-level plugins. + - Interceptors registered via operation-level configuration. +- What runtime plugins will be defined in `smithy-rs`? + - `RetryStrategy`: Configures how requests are retried. + - `TraceProbes`: Configures locations to which SDK metrics are published. + - `EndpointProviders`: Configures which hostname an SDK will call when making a request. + - `HTTPClients`: Configures how remote services are called. + - `IdentityProviders`: Configures how customers identify themselves to remote services. + - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Configures how customers authenticate themselves to remote services. + - `Checksum Algorithms`: Configures how an SDK calculates request and response checksums. ## Changes checklist From 383de1a554c9df21c2ec820cbdfa793547a9652e Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Tue, 14 Mar 2023 10:41:50 -0500 Subject: [PATCH 3/9] update: RFC to reflect current impl of orchestrator add: section on encapsulation --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 292 +++++++++++++----- 1 file changed, 211 insertions(+), 81 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index f548614121..891f03426d 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -2,10 +2,6 @@ > status: RFC > applies-to: The smithy client ---- -status: RFC -applies-to: "The smithy client" ---- This RFC proposes a new process for constructing client requests and handling service responses. This new process is intended to: @@ -24,6 +20,16 @@ This RFC references but is not the source of truth on: - Interceptors: To be described in depth in a future RFC. - Runtime Plugins: To be described in depth in a future RFC. +## TLDR; + +When a smithy client communicates with a smithy service, messages are handled by an "orchestrator." The orchestrator runs in two main phases: +1. Constructing configuration. + - This process is user-configurable with "runtime plugins." + - Configuration is stored in a typemap. +1. Transforming a client request into a server response. + - This process is user-configurable with "interceptors." + - Interceptors are functions that are run by "hooks" in the request/response lifecycle. + ## Terminology - **SDK Client**: A high-level abstraction allowing users to make requests to remote services. @@ -36,9 +42,10 @@ This RFC references but is not the source of truth on: - **The request/response lifecycle**: The process by which an *SDK client* makes requests and receives responses from a *remote service*. This process is enacted and managed by the *orchestrator*. - **Orchestrator**: The code within an *SDK client* that handles the process of making requests and receiving responses from *remote services*. The orchestrator is configurable by modifying the *runtime plugins* it's built from. The orchestrator is responsible for calling *interceptors* at the appropriate times in the *request/response lifecycle*. - **Interceptor**/**Hook**: A generic extension point within the *orchestrator*. Supports "anything that someone should be able to do", NOT "anything anyone might want to do". These hooks are: - - Either **read-only** or **read/write**. - - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. + - Either **read-only** or **read/write**. + - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. - **Runtime Plugin**: Runtime plugins are similar to interceptors, but they act on configuration instead of requests and response. Both users and services may define runtime plugins. Smithy also defines several default runtime plugins used by most clients. See the F.A.Q. for a list of plugins with descriptions. +- **ConfigBag**: A `typemap` that's equivalent to [`http::Extensions`](https://docs.rs/http/latest/http/struct.Extensions.html). Used to store configuration for the orchestrator. ## The user experience if this RFC is implemented @@ -48,14 +55,14 @@ For many users, the changes described by this RFC will be invisible. Making a re let sdk_config = aws_config::load_from_env().await; let client = aws_sdk_s3::Client::new(&sdk_config); let res = client.get_object() - .bucket("a-bucket") - .key("a-file.txt") - .send() - .await?; + .bucket("a-bucket") + .key("a-file.txt") + .send() + .await?; match res { - Ok(res) => println!("success: {:?}"), - Err(err) => eprintln!("failure: {:?}") + Ok(res) => println!("success: {:?}"), + Err(err) => eprintln!("failure: {:?}") }; ``` @@ -84,19 +91,19 @@ input message. marshaling the input message into a transport message. 1. **Read After Serialization *(Read-Only)***: The first thing the SDK calls after marshaling the input message into a transport message. 1. *(Retry Loop)* - 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. - 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. - 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. - 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. - 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. - 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. - 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. - 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. - 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. - 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. - 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. - 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). - 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. + 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. + 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. + 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. + 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. + 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. + 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. + 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. + 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. + 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. + 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). + 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. 1. **Modify Before Execution Completion *(Read/Write)***: Before the execution ends. Allows modifying the unmarshaled response (output message or error). 1. **Read After Execution *(Read-Only)***: After everything has happened. This is the last thing the SDK calls during operation execution. @@ -123,6 +130,69 @@ The optional request and response types in the interceptor context can only be a ## How to implement this RFC +### Integrating with the orchestrator + +Imagine we have some sort of request signer. This signer doesn't refer to any orchestrator types. All it needs is a `HeaderMap` along with two strings, and will return a signature in string form. + +```rust +struct Signer; + +impl Signer { + fn sign(headers: &http::HeaderMap, signing_name: &str, signing_region: &str) -> String { + todo!() + } +} +``` + +Now imagine things from the orchestrator's point of view. It requires something that implements an `AuthOrchestrator` which will be responsible for resolving the correct auth +scheme, identity, and signer for an operation, as well as signing the request + +```rust +pub trait AuthOrchestrator: Send + Sync + Debug { + fn auth_request(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; +} + +// And it calls that `AuthOrchestrator` like so: +fn invoke() { + // code omitted for brevity + + // Get the request to be signed + let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); + // Fetch the auth orchestrator from the bag + let auth_orchestrator = cfg .get::>>() .ok_or("missing auth orchestrator")?; + // Auth the request + auth_orchestrator.auth_request(tx_req_mut, cfg)?; + + // code omitted for brevity +} +``` + +The specific implementation of the `AuthOrchestrator` is what brings these two things together: + +```rust +struct Sigv4AuthOrchestrator; + +impl AuthOrchestrator for Sigv4AuthOrchestrator { + fn auth_request(&self, req: &mut http::Request, cfg: &ConfigBag) -> Result<(), BoxError> { + let signer = Signer; + let signing_name = cfg.get::().ok_or(Error::MissingSigningName)?; + let signing_region = cfg.get::().ok_or(Error::MissingSigningRegion)?; + let headers = req.headers_mut(); + + let signature = signer.sign(headers, signing_name, signing_region); + match cfg.get::() { + Some(Query) => req.query.set("sig", signature), + Some(Header) => req.headers_mut().insert("sig", signature), + None => return Err(Error::MissingSignatureLocation), + }; + + Ok(()) + } +} +``` + +This intermediate code should be free from as much logic as possible. Whenever possible, we must maintain this encapsulation. Doing so will make the Orchestrator more flexible, maintainable, and understandable. + ### Layered configuration, stored in type maps > **Type map**: A data structure where stored values are keyed by their type. Hence, only one value can be stored for a given type. @@ -131,24 +201,24 @@ The optional request and response types in the interceptor context can only be a ```rust let conf: ConfigBag = aws_config::from_env() - // Configuration can be common to all smithy clients - .with(RetryConfig::builder().disable_retries().build()) - // Or, protocol-specific - .with(HttpClient::builder().with_().build()) - // Or, AWS-specific - .with(Region::from("us-east-1")) - // Or, service-specific - .with(S3Config::builder().force_path_style(false).build()) - .await; + // Configuration can be common to all smithy clients + .with(RetryConfig::builder().disable_retries().build()) + // Or, protocol-specific + .with(HttpClient::builder().with_().build()) + // Or, AWS-specific + .with(Region::from("us-east-1")) + // Or, service-specific + .with(S3Config::builder().force_path_style(false).build()) + .await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() - .customize() - // Configuration can be set on operations as well as clients - .with(HttpConfig::builder().conn(some_other_conn).build()) - .send() - .await; + .customize() + // Configuration can be set on operations as well as clients + .with(HttpConfig::builder().conn(some_other_conn).build()) + .send() + .await; ``` Setting configuration that will not be used wastes memory and can make debugging more difficult. Therefore, configuration defaults are only set when they're relevant. For example, if a smithy service doesn't support HTTP, then no HTTP client will be set. @@ -159,26 +229,26 @@ Configuration has precedence. Configuration set on an operation will override co ```rust let conf: ConfigBag = aws_config::from_env() - .with(SomeConfig::builder() - .option_a(1) - .option_b(2) - .option_c(3) - .build() - ) - .await; + .with(SomeConfig::builder() + .option_a(1) + .option_b(2) + .option_c(3) + .build() + ) + .await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() - .customize() - .with(SomeConfig::builder() - .option_a(0) - .option_b(Value::Inherit) - .option_c(Value::Unset) - .build() - ) - .send() - .await; + .customize() + .with(SomeConfig::builder() + .option_a(0) + .option_b(Value::Inherit) + .option_c(Value::Unset) + .build() + ) + .send() + .await; ``` In the above example, when the `option_a`, `option_b`, `option_c`, values of `SomeConfig` are accessed, they'll return: @@ -197,24 +267,47 @@ Builders are defined like this: ```rust struct SomeBuilder { - value: Value, + value: Value, } impl struct SomeBuilder { - fn new() -> Self { - // By default, config values inherit from lower-layer configs - Self { value: Value::Inherit } - } - - fn some_field(&mut self, value: impl Into>) -> &mut self { - self.value = value.into(); - self - } + fn new() -> Self { + // By default, config values inherit from lower-layer configs + Self { value: Value::Inherit } + } + + fn some_field(&mut self, value: impl Into>) -> &mut self { + self.value = value.into(); + self + } } ``` Because of `impl Into>`, users don't need to reference the `Value` enum unless they want to "unset" a value. +#### Layer separation and precedence + +Codegen defines default sets of interceptors and runtime plugins at various "levels": + +1. AWS-wide defaults set by codegen. +1. Service-wide defaults set by codegen. +1. Operation-specific defaults set by codegen. + +Likewise, users may mount their own interceptors and runtime plugins: + +1. The AWS config level, e.g. `aws_types::Config`. +1. The service config level, e.g. `aws_sdk_s3::Config`. +1. The operation config level, e.g. `aws_sdk_s3::Client::get_object`. + +Configuration is resolved in a fixed manner by reading the "lowest level" of config available, falling back to "higher levels" only when no value has been set. Therefore, at least 3 separate `ConfigBag`s are necessary, and user configuration has precedence over codegen-defined default configuration. With that in mind, resolution of configuration would look like this: + +1. Check user-set operation config. +1. Check codegen-defined operation config. +1. Check user-set service config. +1. Check codegen-defined service config. +1. Check user-set AWS config. +1. Check codegen-defined AWS config. + ### The `aws-smithy-orchestrator` crate *I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* @@ -383,27 +476,64 @@ async fn make_an_attempt( } ``` +#### Traits + +At various points in the execution of `invoke`, trait objects are fetched from the `ConfigBag`. These are preliminary definitions of those traits: + +```rust +pub type BoxError = Box; +pub type BoxFallibleFut = Pin>>>; + +pub trait TraceProbe: Send + Sync + Debug { + fn dispatch_events(&self, cfg: &ConfigBag) -> BoxFallibleFut<()>; +} + +pub trait RequestSerializer: Send + Sync + Debug { + fn serialize_request(&self, req: &mut In, cfg: &ConfigBag) -> Result; +} + +pub trait ResponseDeserializer: Send + Sync + Debug { + fn deserialize_response(&self, res: &mut TxRes, cfg: &ConfigBag) -> Result; +} + +pub trait Connection: Send + Sync + Debug { + fn call(&self, req: &mut TxReq, cfg: &ConfigBag) -> BoxFallibleFut; +} + +pub trait RetryStrategy: Send + Sync + Debug { + fn should_retry(&self, res: &Out, cfg: &ConfigBag) -> Result; +} + +pub trait AuthOrchestrator: Send + Sync + Debug { + fn auth_request(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; +} + +pub trait EndpointOrchestrator: Send + Sync { + fn resolve_and_apply_endpoint(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; +} +``` + ## F.A.Q. - The orchestrator is a large and complex feature, with many moving parts. How can we ensure that multiple people can contribute in parallel? - - + - - What is the precedence of interceptors? - - The precedence of interceptors is as follows: - - Interceptors registered via Smithy default plugins. - - *(AWS Services only)* Interceptors registered via AWS default plugins. - - Interceptors registered via service-customization plugins. - - Interceptors registered via client-level plugins. - - Interceptors registered via client-level configuration. - - Interceptors registered via operation-level plugins. - - Interceptors registered via operation-level configuration. + - The precedence of interceptors is as follows: + - Interceptors registered via Smithy default plugins. + - *(AWS Services only)* Interceptors registered via AWS default plugins. + - Interceptors registered via service-customization plugins. + - Interceptors registered via client-level plugins. + - Interceptors registered via client-level configuration. + - Interceptors registered via operation-level plugins. + - Interceptors registered via operation-level configuration. - What runtime plugins will be defined in `smithy-rs`? - - `RetryStrategy`: Configures how requests are retried. - - `TraceProbes`: Configures locations to which SDK metrics are published. - - `EndpointProviders`: Configures which hostname an SDK will call when making a request. - - `HTTPClients`: Configures how remote services are called. - - `IdentityProviders`: Configures how customers identify themselves to remote services. - - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Configures how customers authenticate themselves to remote services. - - `Checksum Algorithms`: Configures how an SDK calculates request and response checksums. + - `RetryStrategy`: Configures how requests are retried. + - `TraceProbes`: Configures locations to which SDK metrics are published. + - `EndpointProviders`: Configures which hostname an SDK will call when making a request. + - `HTTPClients`: Configures how remote services are called. + - `IdentityProviders`: Configures how customers identify themselves to remote services. + - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Configures how customers authenticate themselves to remote services. + - `Checksum Algorithms`: Configures how an SDK calculates request and response checksums. ## Changes checklist From e8ac038e68499ff008c12075f40d006c39142aeb Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Wed, 15 Mar 2023 13:12:53 -0500 Subject: [PATCH 4/9] update: RFC code examples --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 226 ++++++++---------- 1 file changed, 97 insertions(+), 129 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index 891f03426d..877f411ebb 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -181,8 +181,8 @@ impl AuthOrchestrator for Sigv4AuthOrchestrator { let signature = signer.sign(headers, signing_name, signing_region); match cfg.get::() { - Some(Query) => req.query.set("sig", signature), - Some(Header) => req.headers_mut().insert("sig", signature), + Some(SignatureLocation::Query) => req.query.set("sig", signature), + Some(SignatureLocation::Header) => req.headers_mut().insert("sig", signature), None => return Err(Error::MissingSignatureLocation), }; @@ -310,7 +310,7 @@ Configuration is resolved in a fixed manner by reading the "lowest level" of con ### The `aws-smithy-orchestrator` crate -*I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* +*The error handling in this example is simplified to use `BoxError`. When fully implemented, this will likely use `SdkError` instead.* ```rust /// `In`: The input message e.g. `ListObjectsRequest` @@ -319,160 +319,128 @@ Configuration is resolved in a fixed manner by reading the "lowest level" of con /// `Out`: The output message. A `Result` containing either: /// - The 'success' output message e.g. `ListObjectsResponse` /// - The 'failure' output message e.g. `NoSuchBucketException` -pub async fn invoke(input: In, cfg: &mut ConfigBag) -> Out +pub async fn invoke( + input: In, + interceptors: &mut Interceptors>, + runtime_plugins: &RuntimePlugins, + cfg: &mut ConfigBag, +) -> Result where // The input must be Clone in case of retries - In: Clone, + In: Clone + 'static, + Req: 'static, + Res: 'static, + T: 'static, { - // Create a new interceptor context. - // This will be passed to each interceptor in turn. - let mut ctx = InterceptorContext::new(req); - let interceptors: &mut Interceptors, http::Response, Out> = - cfg.get()?; - // We clone the config and apply all registered plugins to it, before converting - // it into a type we can use. - let cfg = cfg.clone().apply_plugins().into(); - - interceptors.read_before_execution(&ctx)?; - interceptors.modify_before_serialization(&mut ctx)?; - interceptors.read_before_serialization(&ctx)?; - - // We clone the input, serialize it into ""-form, - // and store it in the interceptor context. - let mod_req = ctx.modeled_request().clone(); - let req = request_serializer(mod_req)?; - ctx.set_tx_request(req); + let mut ctx: InterceptorContext> = + InterceptorContext::new(input); - interceptors.read_after_serialization(&ctx)?; - interceptors.modify_before_retry_loop(&mut ctx)?; + runtime_plugins.apply_client_configuration(cfg)?; + interceptors.client_read_before_execution(&ctx, cfg)?; - let mut attempt = 0; - loop { - if attempt > config.retry.max_attempts() { - break; - } + runtime_plugins.apply_operation_configuration(cfg)?; + interceptors.operation_read_before_execution(&ctx, cfg)?; - // Acquire initial request token. Some retry/quota strategies don't require a - // token for the initial request. We must always ask for the token, - // but taking it may not affect the number of tokens in the bucket. - let mut token = Some(token_bucket.try_acquire(None).map_err(|err| { - SdkError::dispatch_failure(ConnectorError::other( - Box::new(err), - Some(ErrorKind::ClientError), - )) - })?); - - // For attempt other than the first, we need to clear out data set in previous - // attempts. - if attempt > 0 { - ctx.reset(); - } + interceptors.read_before_serialization(&ctx, cfg)?; + interceptors.modify_before_serialization(&mut ctx, cfg)?; - let res = make_an_attempt(&mut cfg, &mut ctx).await; - ctx.set_modeled_response(res); - - interceptors.read_after_attempt(&ctx)?; - interceptors.modify_before_attempt_completion(&mut ctx)?; - - // Figure out if the last attempt succeeded or failed - let retry_kind: RetryKind = retry_classifier( - ctx.modeled_response() - .expect("modeled_response has been set") - .as_ref(), - ); - match retry_kind { - RetryKind::Explicit(_duration) => { - attempt += 1; - token.take().unwrap().forget(); - continue; - } - RetryKind::Error(_err) => { - attempt += 1; - token.take().unwrap().forget(); - continue; - } - RetryKind::UnretryableFailure => { - token.take().unwrap().forget(); - } - RetryKind::Unnecessary => { - token.take().unwrap().release(); - if attempt == 0 { - // Some token buckets refill if a request succeeds with retrying - token_bucket.refill_on_instant_success(); - } - } - _unhandled => unreachable!("encountered unhandled RetryKind {_unhandled:?}"), + let request_serializer = cfg + .get::>>() + .ok_or("missing serializer")?; + let req = request_serializer.serialize_request(ctx.modeled_request_mut(), cfg)?; + ctx.set_tx_request(req); + + interceptors.read_after_serialization(&ctx, cfg)?; + interceptors.modify_before_retry_loop(&mut ctx, cfg)?; + + loop { + make_an_attempt(&mut ctx, cfg, interceptors).await?; + interceptors.read_after_attempt(&ctx, cfg)?; + interceptors.modify_before_attempt_completion(&mut ctx, cfg)?; + + let retry_strategy = cfg + .get::>>>() + .ok_or("missing retry strategy")?; + let mod_res = ctx + .modeled_response() + .expect("it's set during 'make_an_attempt'"); + if retry_strategy.should_retry(mod_res, cfg)? { + continue; } - interceptors.modify_before_completion(&mut ctx)?; - // Dispatch logging events to all registered tracing probes - cfg.dispatch_events(); - interceptors.read_after_execution(&ctx)?; + interceptors.modify_before_completion(&mut ctx, cfg)?; + let trace_probe = cfg + .get::>() + .ok_or("missing trace probes")?; + trace_probe.dispatch_events(cfg); + interceptors.read_after_execution(&ctx, cfg)?; break; } - let (modeled_response, tx_response, property_bag) = - ctx.into_responses().map_err(SdkError::interceptor_error)?; - let operation_response = operation::Response::from_parts(tx_response, property_bag); - - match modeled_response { - Ok(output) => Ok(SdkSuccess { - parsed: output, - raw: operation_response, - }), - Err(err) => Err(SdkError::service_error(err, operation_response)), - } + let (modeled_response, _) = ctx.into_responses()?; + modeled_response } // Making an HTTP request can fail for several reasons, but we still need to // call lifecycle events when that happens. Therefore, we define this // `make_an_attempt` function to make error handling simpler. -async fn make_an_attempt( +async fn make_an_attempt( + ctx: &mut InterceptorContext>, cfg: &mut ConfigBag, - ctx: &mut InterceptorContext, http::Response, Out>, -) where - In: Clone, + interceptors: &mut Interceptors>, +) -> Result<(), BoxError> +where + In: Clone + 'static, + Req: 'static, + Res: 'static, + T: 'static, { - let interceptors: &mut Interceptors, http::Response, Out> = - cfg.get()?; - - interceptors.read_before_attempt(ctx); - let auth_schemes = resolve_auth_schemes(ctx, cfg)?; - let signer = get_signer_for_first_supported_auth_scheme(&auth_schemes)?; - let identity = auth_scheme.resolve_identity(cfg)?; - resolve_and_apply_endpoint(ctx, cfg)?; - - interceptors.modify_before_signing(ctx)?; - interceptors.read_before_signing(ctx)?; - let (tx_req_mut, props) = ctx.tx_request_mut().expect("tx_request has been set"); - signer(tx_req_mut, &props)?; - interceptors.read_after_signing(ctx)?; - interceptors.modify_before_transmit(ctx)?; - interceptors.read_before_transmit(ctx)?; + interceptors.read_before_attempt(ctx, cfg)?; + + let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); + let endpoint_orchestrator = cfg + .get::>>() + .ok_or("missing endpoint orchestrator")?; + endpoint_orchestrator.resolve_and_apply_endpoint(tx_req_mut, cfg)?; + + interceptors.modify_before_signing(ctx, cfg)?; + interceptors.read_before_signing(ctx, cfg)?; + + let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); + let auth_orchestrator = cfg + .get::>>() + .ok_or("missing auth orchestrator")?; + auth_orchestrator.auth_request(tx_req_mut, cfg)?; + + interceptors.read_after_signing(ctx, cfg)?; + interceptors.modify_before_transmit(ctx, cfg)?; + interceptors.read_before_transmit(ctx, cfg)?; // The connection consumes the request but we need to keep a copy of it // within the interceptor context, so we clone it here. - let tx_req = try_clone_http_request(ctx.tx_request().expect("tx_request has been set")) - .expect("tx_request is cloneable"); - let connection = cfg - .get:: Box>>>() - .ok_or_else(|| ConnectorError)?; - let res = connection(tx_req).await?; + let res = { + let tx_req = ctx.tx_request_mut().expect("tx_request has been set"); + let connection = cfg + .get::>>() + .ok_or("missing connector")?; + connection.call(tx_req, cfg).await? + }; ctx.set_tx_response(res); - interceptors.read_after_transmit(ctx)?; - interceptors.modify_before_deserialization(ctx)?; - interceptors.read_before_deserialization(ctx)?; - - let (tx_res, _) = ctx.tx_response_mut().expect("tx_response has been set"); + interceptors.read_after_transmit(ctx, cfg)?; + interceptors.modify_before_deserialization(ctx, cfg)?; + interceptors.read_before_deserialization(ctx, cfg)?; + let tx_res = ctx.tx_response_mut().expect("tx_response has been set"); let response_deserializer = cfg - .get:: Box>>>() - .ok_or_else(|| CfgError)?; - let res = response_deserializer(tx_res).await?; + .get::>>>() + .ok_or("missing response deserializer")?; + let res = response_deserializer.deserialize_response(tx_res, cfg)?; ctx.set_modeled_response(res); - interceptors.read_after_deserialization(&ctx) + interceptors.read_after_deserialization(ctx, cfg)?; + + Ok(()) } ``` From 37fc69da44bc3c00aec8dcea34fb2684927ede68 Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Fri, 17 Mar 2023 10:16:41 -0500 Subject: [PATCH 5/9] update: changes section: update: FAQ update: set status to "implemented" --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 246 ++++++++++-------- 1 file changed, 131 insertions(+), 115 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index 877f411ebb..d3fe6bf655 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -1,6 +1,6 @@ # Smithy Orchestrator -> status: RFC +> status: implemented > applies-to: The smithy client This RFC proposes a new process for constructing client requests and handling service responses. This new process is intended to: @@ -24,11 +24,11 @@ This RFC references but is not the source of truth on: When a smithy client communicates with a smithy service, messages are handled by an "orchestrator." The orchestrator runs in two main phases: 1. Constructing configuration. - - This process is user-configurable with "runtime plugins." - - Configuration is stored in a typemap. + - This process is user-configurable with "runtime plugins." + - Configuration is stored in a typemap. 1. Transforming a client request into a server response. - - This process is user-configurable with "interceptors." - - Interceptors are functions that are run by "hooks" in the request/response lifecycle. + - This process is user-configurable with "interceptors." + - Interceptors are functions that are run by "hooks" in the request/response lifecycle. ## Terminology @@ -42,8 +42,8 @@ When a smithy client communicates with a smithy service, messages are handled by - **The request/response lifecycle**: The process by which an *SDK client* makes requests and receives responses from a *remote service*. This process is enacted and managed by the *orchestrator*. - **Orchestrator**: The code within an *SDK client* that handles the process of making requests and receiving responses from *remote services*. The orchestrator is configurable by modifying the *runtime plugins* it's built from. The orchestrator is responsible for calling *interceptors* at the appropriate times in the *request/response lifecycle*. - **Interceptor**/**Hook**: A generic extension point within the *orchestrator*. Supports "anything that someone should be able to do", NOT "anything anyone might want to do". These hooks are: - - Either **read-only** or **read/write**. - - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. + - Either **read-only** or **read/write**. + - Able to read and modify the **Input**, **Transport Request**, **Transport Response**, or **Output** messages. - **Runtime Plugin**: Runtime plugins are similar to interceptors, but they act on configuration instead of requests and response. Both users and services may define runtime plugins. Smithy also defines several default runtime plugins used by most clients. See the F.A.Q. for a list of plugins with descriptions. - **ConfigBag**: A `typemap` that's equivalent to [`http::Extensions`](https://docs.rs/http/latest/http/struct.Extensions.html). Used to store configuration for the orchestrator. @@ -55,14 +55,14 @@ For many users, the changes described by this RFC will be invisible. Making a re let sdk_config = aws_config::load_from_env().await; let client = aws_sdk_s3::Client::new(&sdk_config); let res = client.get_object() - .bucket("a-bucket") - .key("a-file.txt") - .send() - .await?; + .bucket("a-bucket") + .key("a-file.txt") + .send() + .await?; match res { - Ok(res) => println!("success: {:?}"), - Err(err) => eprintln!("failure: {:?}") + Ok(res) => println!("success: {:?}"), + Err(err) => eprintln!("failure: {:?}") }; ``` @@ -83,27 +83,27 @@ Interceptors are similar to middlewares, in that they are functions that can rea #### Currently supported hooks 1. **Read Before Execution *(Read-Only)***: Before anything happens. This is the first -thing the SDK calls during operation execution. + thing the SDK calls during operation execution. 1. **Modify Before Serialization *(Read/Write)***: Before the input message given by -the customer is marshalled into a transport request message. Allows modifying the -input message. + the customer is marshalled into a transport request message. Allows modifying the + input message. 1. **Read Before Serialization *(Read-Only)***: The last thing the SDK calls before -marshaling the input message into a transport message. + marshaling the input message into a transport message. 1. **Read After Serialization *(Read-Only)***: The first thing the SDK calls after marshaling the input message into a transport message. 1. *(Retry Loop)* - 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. - 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. - 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. - 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. - 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. - 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. - 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. - 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. - 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. - 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. - 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. - 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). - 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Retry Loop *(Read/Write)***: The last thing the SDK calls before entering the retry look. Allows modifying the transport message. + 1. **Read Before Attempt *(Read-Only)***: The first thing the SDK calls “inside” of the retry loop. + 1. **Modify Before Signing *(Read/Write)***: Before the transport request message is signed. Allows modifying the transport message. + 1. **Read Before Signing *(Read-Only)***: The last thing the SDK calls before signing the transport request message. + 1. **Read After Signing (Read-Only)****: The first thing the SDK calls after signing the transport request message. + 1. **Modify Before Transmit *(Read/Write)***: Before the transport request message is sent to the service. Allows modifying the transport message. + 1. **Read Before Transmit *(Read-Only)***: The last thing the SDK calls before sending the transport request message. + 1. **Read After Transmit *(Read-Only)***: The last thing the SDK calls after receiving the transport response message. + 1. **Modify Before Deserialization *(Read/Write)***: Before the transport response message is unmarshaled. Allows modifying the transport response message. + 1. **Read Before Deserialization *(Read-Only)***: The last thing the SDK calls before unmarshalling the transport response message into an output message. + 1. **Read After Deserialization *(Read-Only)***: The last thing the SDK calls after unmarshaling the transport response message into an output message. + 1. **Modify Before Attempt Completion *(Read/Write)***: Before the retry loop ends. Allows modifying the unmarshaled response (output message or error). + 1. **Read After Attempt *(Read-Only)***: The last thing the SDK calls “inside” of the retry loop. 1. **Modify Before Execution Completion *(Read/Write)***: Before the execution ends. Allows modifying the unmarshaled response (output message or error). 1. **Read After Execution *(Read-Only)***: After everything has happened. This is the last thing the SDK calls during operation execution. @@ -139,7 +139,7 @@ struct Signer; impl Signer { fn sign(headers: &http::HeaderMap, signing_name: &str, signing_region: &str) -> String { - todo!() + todo!() } } ``` @@ -149,21 +149,21 @@ scheme, identity, and signer for an operation, as well as signing the request ```rust pub trait AuthOrchestrator: Send + Sync + Debug { - fn auth_request(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; + fn auth_request(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; } // And it calls that `AuthOrchestrator` like so: fn invoke() { - // code omitted for brevity + // code omitted for brevity - // Get the request to be signed - let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); - // Fetch the auth orchestrator from the bag - let auth_orchestrator = cfg .get::>>() .ok_or("missing auth orchestrator")?; - // Auth the request - auth_orchestrator.auth_request(tx_req_mut, cfg)?; + // Get the request to be signed + let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); + // Fetch the auth orchestrator from the bag + let auth_orchestrator = cfg .get::>>() .ok_or("missing auth orchestrator")?; + // Auth the request + auth_orchestrator.auth_request(tx_req_mut, cfg)?; - // code omitted for brevity + // code omitted for brevity } ``` @@ -182,8 +182,8 @@ impl AuthOrchestrator for Sigv4AuthOrchestrator { let signature = signer.sign(headers, signing_name, signing_region); match cfg.get::() { Some(SignatureLocation::Query) => req.query.set("sig", signature), - Some(SignatureLocation::Header) => req.headers_mut().insert("sig", signature), - None => return Err(Error::MissingSignatureLocation), + Some(SignatureLocation::Header) => req.headers_mut().insert("sig", signature), + None => return Err(Error::MissingSignatureLocation), }; Ok(()) @@ -201,24 +201,24 @@ This intermediate code should be free from as much logic as possible. Whenever p ```rust let conf: ConfigBag = aws_config::from_env() - // Configuration can be common to all smithy clients - .with(RetryConfig::builder().disable_retries().build()) - // Or, protocol-specific - .with(HttpClient::builder().with_().build()) - // Or, AWS-specific - .with(Region::from("us-east-1")) - // Or, service-specific - .with(S3Config::builder().force_path_style(false).build()) - .await; +// Configuration can be common to all smithy clients +.with(RetryConfig::builder().disable_retries().build()) +// Or, protocol-specific +.with(HttpClient::builder().build()) +// Or, AWS-specific +.with(Region::from("us-east-1")) +// Or, service-specific +.with(S3Config::builder().force_path_style(false).build()) +.await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() - .customize() - // Configuration can be set on operations as well as clients - .with(HttpConfig::builder().conn(some_other_conn).build()) - .send() - .await; +.customize() +// Configuration can be set on operations as well as clients +.with(HttpConfig::builder().conn(some_other_conn).build()) +.send() +.await; ``` Setting configuration that will not be used wastes memory and can make debugging more difficult. Therefore, configuration defaults are only set when they're relevant. For example, if a smithy service doesn't support HTTP, then no HTTP client will be set. @@ -229,26 +229,26 @@ Configuration has precedence. Configuration set on an operation will override co ```rust let conf: ConfigBag = aws_config::from_env() - .with(SomeConfig::builder() - .option_a(1) - .option_b(2) - .option_c(3) - .build() - ) - .await; +.with(SomeConfig::builder() +.option_a(1) +.option_b(2) +.option_c(3) +.build() +) +.await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() - .customize() - .with(SomeConfig::builder() - .option_a(0) - .option_b(Value::Inherit) - .option_c(Value::Unset) - .build() - ) - .send() - .await; +.customize() +.with(SomeConfig::builder() +.option_a(0) +.option_b(Value::Inherit) +.option_c(Value::Unset) +.build() +) +.send() +.await; ``` In the above example, when the `option_a`, `option_b`, `option_c`, values of `SomeConfig` are accessed, they'll return: @@ -267,19 +267,19 @@ Builders are defined like this: ```rust struct SomeBuilder { - value: Value, + value: Value, } impl struct SomeBuilder { - fn new() -> Self { - // By default, config values inherit from lower-layer configs - Self { value: Value::Inherit } - } - - fn some_field(&mut self, value: impl Into>) -> &mut self { - self.value = value.into(); - self - } + fn new() -> Self { + // By default, config values inherit from lower-layer configs + Self { value: Value::Inherit } + } + + fn some_field(&mut self, value: impl Into>) -> &mut self { + self.value = value.into(); + self + } } ``` @@ -310,7 +310,7 @@ Configuration is resolved in a fixed manner by reading the "lowest level" of con ### The `aws-smithy-orchestrator` crate -*The error handling in this example is simplified to use `BoxError`. When fully implemented, this will likely use `SdkError` instead.* +*I've omitted some of the error conversion to shorten this example and make it easier to understand. The real version will be messier.* ```rust /// `In`: The input message e.g. `ListObjectsRequest` @@ -325,12 +325,12 @@ pub async fn invoke( runtime_plugins: &RuntimePlugins, cfg: &mut ConfigBag, ) -> Result -where - // The input must be Clone in case of retries - In: Clone + 'static, - Req: 'static, - Res: 'static, - T: 'static, + where + // The input must be Clone in case of retries + In: Clone + 'static, + Req: 'static, + Res: 'static, + T: 'static, { let mut ctx: InterceptorContext> = InterceptorContext::new(input); @@ -390,11 +390,11 @@ async fn make_an_attempt( cfg: &mut ConfigBag, interceptors: &mut Interceptors>, ) -> Result<(), BoxError> -where - In: Clone + 'static, - Req: 'static, - Res: 'static, - T: 'static, + where + In: Clone + 'static, + Req: 'static, + Res: 'static, + T: 'static, { interceptors.read_before_attempt(ctx, cfg)?; @@ -449,9 +449,6 @@ where At various points in the execution of `invoke`, trait objects are fetched from the `ConfigBag`. These are preliminary definitions of those traits: ```rust -pub type BoxError = Box; -pub type BoxFallibleFut = Pin>>>; - pub trait TraceProbe: Send + Sync + Debug { fn dispatch_events(&self, cfg: &ConfigBag) -> BoxFallibleFut<()>; } @@ -476,33 +473,52 @@ pub trait AuthOrchestrator: Send + Sync + Debug { fn auth_request(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; } -pub trait EndpointOrchestrator: Send + Sync { +pub trait EndpointOrchestrator: Send + Sync + Debug { fn resolve_and_apply_endpoint(&self, req: &mut Req, cfg: &ConfigBag) -> Result<(), BoxError>; + fn resolve_auth_schemes(&self) -> Result, BoxError>; } ``` ## F.A.Q. - The orchestrator is a large and complex feature, with many moving parts. How can we ensure that multiple people can contribute in parallel? - - + - By defining the entire orchestrator and agreeing on its structure, we can then move on to working on individual runtime plugins and interceptors. - What is the precedence of interceptors? - - The precedence of interceptors is as follows: - - Interceptors registered via Smithy default plugins. - - *(AWS Services only)* Interceptors registered via AWS default plugins. - - Interceptors registered via service-customization plugins. - - Interceptors registered via client-level plugins. - - Interceptors registered via client-level configuration. - - Interceptors registered via operation-level plugins. - - Interceptors registered via operation-level configuration. + - The precedence of interceptors is as follows: + - Interceptors registered via Smithy default plugins. + - *(AWS Services only)* Interceptors registered via AWS default plugins. + - Interceptors registered via service-customization plugins. + - Interceptors registered via client-level plugins. + - Interceptors registered via client-level configuration. + - Interceptors registered via operation-level plugins. + - Interceptors registered via operation-level configuration. - What runtime plugins will be defined in `smithy-rs`? - - `RetryStrategy`: Configures how requests are retried. - - `TraceProbes`: Configures locations to which SDK metrics are published. - - `EndpointProviders`: Configures which hostname an SDK will call when making a request. - - `HTTPClients`: Configures how remote services are called. - - `IdentityProviders`: Configures how customers identify themselves to remote services. - - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Configures how customers authenticate themselves to remote services. - - `Checksum Algorithms`: Configures how an SDK calculates request and response checksums. + - `RetryStrategy`: Configures how requests are retried. + - `TraceProbes`: Configures locations to which SDK metrics are published. + - `EndpointProviders`: Configures which hostname an SDK will call when making a request. + - `HTTPClients`: Configures how remote services are called. + - `IdentityProviders`: Configures how customers identify themselves to remote services. + - `HTTPAuthSchemes` & `AuthSchemeResolvers`: Configures how customers authenticate themselves to remote services. + - `Checksum Algorithms`: Configures how an SDK calculates request and response checksums. ## Changes checklist -TODO +- [x] Create a new `aws-smithy-runtime` crate. + - Add orchestrator implementation + - Define the orchestrator/runtime plugin interface traits + - `TraceProbe` + - `RequestSerializer` + - `ResponseDeserializer` + - `Connection` + - `RetryStrategy` + - `AuthOrchestrator` + - `EndpointOrchestrator` +- [x] Create a new `aws-smithy-runtime-api` crate. + - Add `ConfigBag` module + - Add `retries` module + - Add `rate_limiting` sub-module + - Add `interceptors` module + - `Interceptor` trait + - `InterceptorContext` impl + - Add `runtime_plugins` module +- [x] Create a new integration test that ensures the orchestrator works. From c4f034df9d30c93ea22a11ef984fc1fbf0bc092d Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Fri, 17 Mar 2023 10:30:10 -0500 Subject: [PATCH 6/9] fix: code example indentation --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index d3fe6bf655..83347e020b 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -201,24 +201,24 @@ This intermediate code should be free from as much logic as possible. Whenever p ```rust let conf: ConfigBag = aws_config::from_env() -// Configuration can be common to all smithy clients -.with(RetryConfig::builder().disable_retries().build()) -// Or, protocol-specific -.with(HttpClient::builder().build()) -// Or, AWS-specific -.with(Region::from("us-east-1")) -// Or, service-specific -.with(S3Config::builder().force_path_style(false).build()) -.await; + // Configuration can be common to all smithy clients + .with(RetryConfig::builder().disable_retries().build()) + // Or, protocol-specific + .with(HttpClient::builder().build()) + // Or, AWS-specific + .with(Region::from("us-east-1")) + // Or, service-specific + .with(S3Config::builder().force_path_style(false).build()) + .await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() -.customize() -// Configuration can be set on operations as well as clients -.with(HttpConfig::builder().conn(some_other_conn).build()) -.send() -.await; + .customize() + // Configuration can be set on operations as well as clients + .with(HttpConfig::builder().conn(some_other_conn).build()) + .send() + .await; ``` Setting configuration that will not be used wastes memory and can make debugging more difficult. Therefore, configuration defaults are only set when they're relevant. For example, if a smithy service doesn't support HTTP, then no HTTP client will be set. @@ -229,26 +229,26 @@ Configuration has precedence. Configuration set on an operation will override co ```rust let conf: ConfigBag = aws_config::from_env() -.with(SomeConfig::builder() -.option_a(1) -.option_b(2) -.option_c(3) -.build() -) -.await; + .with(SomeConfig::builder() + .option_a(1) + .option_b(2) + .option_c(3) + .build() + ) + .await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() -.customize() -.with(SomeConfig::builder() -.option_a(0) -.option_b(Value::Inherit) -.option_c(Value::Unset) -.build() -) -.send() -.await; + .customize() + .with(SomeConfig::builder() + .option_a(0) + .option_b(Value::Inherit) + .option_c(Value::Unset) + .build() + ) + .send() + .await; ``` In the above example, when the `option_a`, `option_b`, `option_c`, values of `SomeConfig` are accessed, they'll return: From 256d753e29da4c4dc3f0e1338739edfc764b48c9 Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Fri, 17 Mar 2023 10:32:16 -0500 Subject: [PATCH 7/9] no really, fix the indents --- .../src/rfcs/rfc0034_smithy_orchestrator.md | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index 83347e020b..942f6c4924 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -229,24 +229,26 @@ Configuration has precedence. Configuration set on an operation will override co ```rust let conf: ConfigBag = aws_config::from_env() - .with(SomeConfig::builder() - .option_a(1) - .option_b(2) - .option_c(3) - .build() + .with( + SomeConfig::builder() + .option_a(1) + .option_b(2) + .option_c(3) ) + .build() .await; let client = aws_sdk_s3::Client::new(&conf); client.list_buckets() .customize() - .with(SomeConfig::builder() - .option_a(0) - .option_b(Value::Inherit) - .option_c(Value::Unset) - .build() + .with( + SomeConfig::builder() + .option_a(0) + .option_b(Value::Inherit) + .option_c(Value::Unset) ) + .build() .send() .await; ``` From d6a987c1cf181f6177de998295c0e9bcf4bd1ec9 Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Fri, 17 Mar 2023 10:33:38 -0500 Subject: [PATCH 8/9] I keep seeing more... --- design/src/rfcs/rfc0034_smithy_orchestrator.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index 942f6c4924..a1b7e3f4db 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -274,13 +274,13 @@ struct SomeBuilder { impl struct SomeBuilder { fn new() -> Self { - // By default, config values inherit from lower-layer configs - Self { value: Value::Inherit } + // By default, config values inherit from lower-layer configs + Self { value: Value::Inherit } } fn some_field(&mut self, value: impl Into>) -> &mut self { - self.value = value.into(); - self + self.value = value.into(); + self } } ``` From e5e36962edfedeb8fc80c3ef6810a366abdd2d0a Mon Sep 17 00:00:00 2001 From: Zelda Hessler Date: Fri, 17 Mar 2023 10:38:34 -0500 Subject: [PATCH 9/9] Thanks Yuki --- design/src/rfcs/rfc0034_smithy_orchestrator.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/design/src/rfcs/rfc0034_smithy_orchestrator.md b/design/src/rfcs/rfc0034_smithy_orchestrator.md index a1b7e3f4db..84d5952253 100644 --- a/design/src/rfcs/rfc0034_smithy_orchestrator.md +++ b/design/src/rfcs/rfc0034_smithy_orchestrator.md @@ -159,7 +159,9 @@ fn invoke() { // Get the request to be signed let tx_req_mut = ctx.tx_request_mut().expect("tx_request has been set"); // Fetch the auth orchestrator from the bag - let auth_orchestrator = cfg .get::>>() .ok_or("missing auth orchestrator")?; + let auth_orchestrator = cfg + .get::>>() + .ok_or("missing auth orchestrator")?; // Auth the request auth_orchestrator.auth_request(tx_req_mut, cfg)?;