diff --git a/_data/kroxylicious.yml b/_data/kroxylicious.yml index 39a751c..7754d8f 100644 --- a/_data/kroxylicious.yml +++ b/_data/kroxylicious.yml @@ -1,6 +1,8 @@ versions: - title: 'Development' url: '/kroxylicious' + - title: 'v0.7.0' + url: '/docs/v0.7.0/' - title: 'v0.6.0' url: '/docs/v0.6.0/' - title: 'v0.5.1' diff --git a/docs/v0.7.0/_files/_assets/attributes.adoc b/docs/v0.7.0/_files/_assets/attributes.adoc new file mode 100644 index 0000000..accfbd7 --- /dev/null +++ b/docs/v0.7.0/_files/_assets/attributes.adoc @@ -0,0 +1,44 @@ +// AsciiDoc settings +:data-uri!: +:doctype: book +:experimental: +:idprefix: +:imagesdir: images +:numbered: +:sectanchors!: +:sectnums: +:source-highlighter: highlight.js +:toc: left +:linkattrs: +:toclevels: 2 +:icons: font + +//Latest version +:ProductVersion: 0.7 +:gitRef: releases/tag/v0.7.0 +:ApicurioVersion: 2.6.x + +//Proxy links +:github: https://github.com/kroxylicious/kroxylicious +:github-releases: https://github.com/kroxylicious/kroxylicious/{gitRef} +:github-issues: https://github.com/kroxylicious/kroxylicious/issues[Kroxylicious issues^] +:api-javadoc: https://javadoc.io/doc/io.kroxylicious/kroxylicious-api/{ProductVersion} +:kms-api-javadoc: https://javadoc.io/doc/io.kroxylicious/kroxylicious-kms/{ProductVersion} +:encryption-api-javadoc: https://javadoc.io/doc/io.kroxylicious/kroxylicious-encryption/{ProductVersion} +:start-script: https://github.com/kroxylicious/kroxylicious/blob/{gitRef}/kroxylicious-app/src/assembly/kroxylicious-start.sh + +//Kafka links +:ApacheKafkaSite: https://kafka.apache.org[Apache Kafka website^] +:kafka-protocol: https://kafka.apache.org/protocol.html + +//java links +:java-17-javadoc: https://docs.oracle.com/en/java/javase/17/docs/api + +//Vault links +:hashicorp-vault: https://developer.hashicorp.com/vault + +//AWS links +:aws: https://docs.aws.amazon.com/ + +// Apicurio links +:apicurio-docs: https://www.apicur.io/registry/docs/apicurio-registry/{ApicurioVersion}/ diff --git a/docs/v0.7.0/_files/_assets/trademarks.adoc b/docs/v0.7.0/_files/_assets/trademarks.adoc new file mode 100644 index 0000000..8e0c362 --- /dev/null +++ b/docs/v0.7.0/_files/_assets/trademarks.adoc @@ -0,0 +1,5 @@ += Trademark notice + +* Hashicorp Vault is a registered trademark of HashiCorp, Inc. +* AWS Key Management Service is a trademark of Amazon.com, Inc. or its affiliates. +* Apache Kafka is a registered trademark of The Apache Software Foundation. \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-aws-kms.adoc b/docs/v0.7.0/_files/assemblies/assembly-aws-kms.adoc new file mode 100644 index 0000000..2ad30b4 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-aws-kms.adoc @@ -0,0 +1,19 @@ +// file included in the following: +// +// assembly-record-encryption-filter.adoc + +[id='assembly-aws-kms-{context}'] += Setting up AWS KMS + +[role="_abstract"] +To use {aws}/kms/latest/developerguide/overview.html[AWS Key Management Service] with the Record Encryption filter, use the following setup: + +* Establish an AWS KMS aliasing convention for keys +* Configure the AWS KMS +* Create AWS KMS keys + +You'll need a privileged AWS user that is capable of creating users and policies to perform the set-up. + +include::../modules/record-encryption/aws-kms/con-aws-kms-setup.adoc[leveloffset=+1] +include::../modules/record-encryption/aws-kms/con-aws-kms-service-config.adoc[leveloffset=+1] +include::../modules/record-encryption/aws-kms/con-aws-kms-key-creation.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-built-in-filters.adoc b/docs/v0.7.0/_files/assemblies/assembly-built-in-filters.adoc new file mode 100644 index 0000000..f38afa3 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-built-in-filters.adoc @@ -0,0 +1,14 @@ +// file included in the following: +// +// index.adoc + +[id='assembly-built-in-filters-{context}'] += Built-in filters + +[role="_abstract"] +Kroxylicious comes with a suite of built-in filters designed to enhance the functionality and security of your Kafka clusters. + +include::assembly-record-encryption-filter.adoc[leveloffset=+1] +include::assembly-multi-tenancy-filter.adoc[leveloffset=+1] +include::assembly-record-validation-filter.adoc[leveloffset=+1] +include::../modules/oauthbearer/con-oauthbearer.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-hashicorp-vault.adoc b/docs/v0.7.0/_files/assemblies/assembly-hashicorp-vault.adoc new file mode 100644 index 0000000..1971982 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-hashicorp-vault.adoc @@ -0,0 +1,17 @@ +// file included in the following: +// +// assembly-record-encryption-filter.adoc + +[id='assembly-hashicorp-vault-{context}'] += Setting up HashiCorp Vault + +[role="_abstract"] +To use HashiCorp Vault with the Record Encryption filter, use the following setup: + +* Enable the Transit Engine as the Record Encryption filter relies on its APIs. +* Create a Vault policy specifically for the filter with permissions for generating and decrypting Data Encryption Keys (DEKs) for envelope encryption. +* Obtain a Vault token that includes the filter policy. + +include::../modules/record-encryption/hashicorp-vault/con-vault-setup.adoc[leveloffset=+1] +include::../modules/record-encryption/hashicorp-vault/con-vault-service-config.adoc[leveloffset=+1] +include::../modules/record-encryption/hashicorp-vault/con-vault-key-creation.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-multi-tenancy-filter.adoc b/docs/v0.7.0/_files/assemblies/assembly-multi-tenancy-filter.adoc new file mode 100644 index 0000000..9ab6984 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-multi-tenancy-filter.adoc @@ -0,0 +1,31 @@ +// file included in the following: +// +// assembly-built-in-filters.adoc + +[id='assembly-multi-tenancy-filter-{context}'] += (Preview) Multi-tenancy filter + +[role="_abstract"] +Kroxylicious’s Multi-tenancy filter presents a single Kafka cluster to tenants as if it were multiple clusters. +Operations are isolated to a single tenant by prefixing resources with an identifier. + +NOTE: This filter is currently in incubation and available as a preview. +We would not recommend using it in a production environment. + +The Multi-tenancy filter works by intercepting all Kafka RPCs (remote procedure calls) that reference resources, such as topic names and consumer group names: + +Request path:: On the request path, resource names are prefixed with a tenant identifier. +Response path:: On the response path, the prefix is removed. + +Kafka RPCs that list resources are filtered so that only resources belonging to the tenant are returned, effectively creating a private cluster experience for each tenant. + +To set up the filter, configure it in Kroxylicious. + +IMPORTANT: While the Multi-tenancy filter isolates operations on resources, it does not isolate user identities across tenants. +User authentication and ACLs (Access Control Lists) are shared across all tenants, meaning that identity is not scoped to individual tenants. +For more information on open issues related to this filter, see {github-issues}. + +NOTE: For more information on Kafka's support for multi-tenancy, see the {ApacheKafkaSite}. + +//configuring the multi-tenancy filter +include::../modules/multi-tenancy/proc-multi-tenancy.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-overview.adoc b/docs/v0.7.0/_files/assemblies/assembly-overview.adoc new file mode 100644 index 0000000..85dc6e4 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-overview.adoc @@ -0,0 +1,24 @@ +// file included in the following: +// +// index.adoc + +[id='assembly-overview-{context}'] += Kroxylicious overview + +[role="_abstract"] +Kroxylicious is an Apache Kafka protocol-aware ("Layer 7") proxy designed to enhance Kafka-based systems. +Through its filter mechanism it allows additional behavior to be introduced into a Kafka-based system without requiring changes to either your applications or the Kafka cluster itself. +Built-in filters are provided as part of the solution. + +Functioning as an intermediary, the Kroxylicious mediates communication between a Kafka cluster and its clients. +It takes on the responsibility of receiving, filtering, and forwarding messages. + +An API provides a convenient means for implementing custom logic within the proxy. + +[role="_additional-resources"] +.Additional resources + +* {ApacheKafkaSite} + +//broker config (upstream) +include::../modules/con-proxy-overview.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-record-encryption-filter.adoc b/docs/v0.7.0/_files/assemblies/assembly-record-encryption-filter.adoc new file mode 100644 index 0000000..a1c3872 --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-record-encryption-filter.adoc @@ -0,0 +1,32 @@ +// file included in the following: +// +// assembly-built-in-filters.adoc + +[id='assembly-record-encryption-filter-{context}'] += Record Encryption filter + +[role="_abstract"] +Kroxylicious's Record Encryption filter enhances the security of Kafka messages. +The filter uses industry-standard cryptographic techniques to apply encryption to Kafka messages, ensuring the confidentiality of data stored in the Kafka Cluster. +Kroxylicious centralizes topic-level encryption, ensuring streamlined encryption across Kafka clusters. + +There are three steps to using the filter: + +1. Setting up a Key Management System (KMS). +2. Establishing the encryption keys within the KMS that will be used to encrypt the topics. +3. Configuring the filter within Kroxylicious. + +The filter integrates with a Key Management Service (KMS), which has ultimate responsibility for the safe storage of sensitive key material. +The filter relies on a KMS implementation. +Currently, Kroxylicious integrates with either HashiCorp Vault or AWS Key Management Service. +You can provide implementations for your specific KMS systems. +Additional KMS support will be added based on demand. + +//overview of the record encryption process +include::../modules/record-encryption/con-record-encryption-overview.adoc[leveloffset=+1] +//setting up hashicorp vault +include::assembly-hashicorp-vault.adoc[leveloffset=+1] +//setting up AWS KMS +include::assembly-aws-kms.adoc[leveloffset=+1] +//configuring the record encryption filter +include::../modules/record-encryption/proc-configuring-record-encryption-filter.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/assemblies/assembly-record-validation-filter.adoc b/docs/v0.7.0/_files/assemblies/assembly-record-validation-filter.adoc new file mode 100644 index 0000000..61ec29a --- /dev/null +++ b/docs/v0.7.0/_files/assemblies/assembly-record-validation-filter.adoc @@ -0,0 +1,27 @@ +// file included in the following: +// +// assembly-built-in-filters.adoc + +[id='assembly-record-validation-filter-{context}'] += (Preview) Record Validation + +[role="_abstract"] +The Record Validation filter validates records sent by a producer. +Only records that pass the validation are sent to the broker. +This filter can be used to prevent _poison messages_—such as those containing corrupted data or invalid formats—from entering the Kafka system, which may otherwise lead to consumer failure. + +The filter currently supports two modes of operation: + +1. Schema Validation ensures the content of the record conforms to a schema stored in an https://www.apicur.io/registry/[Apicurio Registry]. +2. JSON Syntax Validation ensures the content of the record contains syntactically valid JSON. + +Validation rules can be applied to check the content of the Kafka record key or value. + +If the validation fails, the product request is rejected and the producing application receives an error response. The broker +will not receive the rejected records. + +NOTE: This filter is currently in incubation and available as a preview. +We would not recommend using it in a production environment. + +//configuring the record-validation filter +include::../modules/record-validation/proc-record-validation.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/index.adoc b/docs/v0.7.0/_files/index.adoc new file mode 100644 index 0000000..a305c9c --- /dev/null +++ b/docs/v0.7.0/_files/index.adoc @@ -0,0 +1,24 @@ +:experimental: +include::_assets/attributes.adoc[] + +:context: proxy + +[id="using-book-{context}"] += Kroxylicious + +include::assemblies/assembly-overview.adoc[leveloffset=+1] + +//built-in filters +include::assemblies/assembly-built-in-filters.adoc[leveloffset=+1] + +//community filters +include::modules/con-community-filters.adoc[leveloffset=+1] + +//custom filters +include::modules/con-custom-filters.adoc[leveloffset=+1] + +include::modules/con-deploying.adoc[leveloffset=+1] +include::modules/con-operating.adoc[leveloffset=+1] + +//trademark notices +include::_assets/trademarks.adoc[leveloffset=+1] \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/con-community-filters.adoc b/docs/v0.7.0/_files/modules/con-community-filters.adoc new file mode 100644 index 0000000..d0912a9 --- /dev/null +++ b/docs/v0.7.0/_files/modules/con-community-filters.adoc @@ -0,0 +1,13 @@ +// file included in the following: +// +// index.adoc + +[id='con-community-filters-{context}'] += Community filters + +[role="_abstract"] +Community contributed filters are showcased in the +https://github.com/kroxylicious/kroxylicious-community-gallery[Community Gallery^]. + +NOTE: These filters are contributed by the community and are not managed or maintained by the Kroxylicious team. +Use them at your own risk. \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/con-custom-filters.adoc b/docs/v0.7.0/_files/modules/con-custom-filters.adoc new file mode 100644 index 0000000..08fea2c --- /dev/null +++ b/docs/v0.7.0/_files/modules/con-custom-filters.adoc @@ -0,0 +1,487 @@ +// Assembly included in the following: +// +// index.adoc + +[id='con-custom-filters-{context}'] += Custom filters + +[role="_abstract"] +Custom filters can be written in the Java programming language. +Kroxylicious supports Java 17. +Knowledge of the {kafka-protocol}[Kafka protocol^] is generally required to write a protocol filter. + +There is currently one class of Custom Filters users can implement: + +<>:: Allow customisation of how protocol messages are handled on their way to, or from, the Cluster. + +The following sections explain in more detail how to write your own filters. + +== Sample Custom Filter Project + +A collection of sample filters is available within the Kroxylicious repository for you to download, try out, and customise. +You can find them {github}/tree/main/kroxylicious-sample[here] for a hands-on introduction to creating your own custom filters. + +== API docs + +Custom filters are built by implementing interfaces supplied by the +{github}/tree/main/api/kroxylicious-api[kroxylicious-api^] module +(https://mvnrepository.com/artifact/io.kroxylicious/kroxylicious-api[io.kroxylicious:kroxylicious-api] on +maven central). You can view the javadoc {api-javadoc}/io/kroxylicious/proxy/filter/package-summary.html[here^]. + +== Dependencies + +How filter classes are loaded is not currently defined by the filter contract. +In other words, filters might be loaded using a classloader-per-filter model, +or using a single class loader. +This doesn't really make a difference to filter authors except where they want to make use of libraries as dependencies. +Because those dependencies might be loaded by the same classloader as the dependencies of other filters there is the possibility of collision. Filter A and Filter B might both want to use Library C, and they might want to use different versions of Library C. + +For common things like logging and metric facade APIs it is recommended to use the facade APIs which are also used by the proxy core. + +// TODO Maven dependency +// TODO Gradle dependency + +// TODO recommend BOM usage + +== Protocol filters + +A protocol filter is a `public` top-level, concrete class with a particular public constructor and which implements +one or more protocol filter interfaces. You can implement two distinct types of Custom Protocol Filter: + +- <> +- <> + +Note that these types are mutually exclusive, for example a Filter is not allowed to implement both `RequestFilter` and +`MetadataRequestFilter`. This is to prevent ambiguity. If we received a `MetadataRequest`, would it be dispatched to +the `onMetadataRequest(..)` method of `MetadataRequestFilter` or the `onRequest` method of `RequestFilter`, or both? +Instead, we disallow these combinations, throwing an exception at runtime if your Filter implements incompatible interfaces. + +=== Specific Message Protocol Filters + +A filter may wish to intercept specific types of Kafka messages. For example, intercept all Produce Requests, or +intercept all Fetch Responses. To support this case Kroxylicious provides an interfaces for all request types and +response types supported by Kafka (at the version of Kafka Kroxylicious depends on). A filter implementation can +implement any combination of these interfaces. + +There is no requirement that a Filter handles both the request and response halves of an RPC. A Filter can choose to +intercept only the request, or only the response, or both the request and response. + +==== Examples + +To intercept all Fetch Requests your class would implement +{api-javadoc}/io/kroxylicious/proxy/filter/FetchRequestFilter.html[FetchRequestFilter^]: + +[source,java] +---- +public class FetchRequestClientIdFilter implements FetchRequestFilter { + + @Override + public CompletionStage onFetchRequest(short apiVersion, + RequestHeaderData header, + FetchRequestData request, + FilterContext context) { + header.setClientId("fetch-client!"); + return context.forwardRequest(header, request); + } +} +---- + +To intercept all Fetch Responses your class would implement +{api-javadoc}/io/kroxylicious/proxy/filter/FetchResponseFilter.html[FetchResponseFilter^]: + +[source,java] +---- +public class FetchRequestClientIdFilter implements FetchResponseFilter { + + @Override + public CompletionStage onFetchResponse(short apiVersion, + ResponseHeaderData header, + FetchResponseData response, + FilterContext context) { + mutateResponse(response); + return context.forwardResponse(header, response); + } +} +---- + +To intercept all Fetch Requests and all Fetch Responses your class would implement +{api-javadoc}/io/kroxylicious/proxy/filter/FetchRequestFilter.html[FetchRequestFilter^] and +{api-javadoc}/io/kroxylicious/proxy/filter/FetchResponseFilter.html[FetchResponseFilter^]: + +[source,java] +---- +public class FetchRequestClientIdFilter implements FetchRequestFilter, FetchResponseFilter { + + @Override + public CompletionStage onFetchRequest(short apiVersion, + RequestHeaderData header, + FetchRequestData request, + FilterContext context) { + header.setClientId("fetch-client!"); + return context.forwardRequest(header, request); + } + + @Override + public CompletionStage onFetchResponse(short apiVersion, + ResponseHeaderData header, + FetchResponseData response, + FilterContext context) { + mutateResponse(response); + return context.forwardResponse(header, response); + } +} +---- +Specific Message Filter interfaces are mutually exclusive with <>. +Kroxylicious will reject invalid combinations of interfaces. + +=== Request/Response Protocol Filters + +A filter may wish to intercept every message being sent from the Client to the Cluster or from the Cluster +to the Client. To do this your custom filter will implement: + +- {api-javadoc}/io/kroxylicious/proxy/filter/RequestFilter.html[RequestFilter^] +to intercept all requests. +- {api-javadoc}/io/kroxylicious/proxy/filter/ResponseFilter.html[ResponseFilter^] +to intercept all responses. + +Custom filters are free to implement either interface or both interfaces to intercept all messages. + +For example: + +[source,java] +---- +public class FixedClientIdFilter implements RequestFilter { + + @Override + public CompletionStage onRequest(ApiKeys apiKey, + RequestHeaderData header, + ApiMessage body, + FilterContext filterContext) { + header.setClientId("example!"); + return filterContext.forwardRequest(header, body); + } + +} +---- + +Request/Response Filter interfaces are mutually exclusive with <> interfaces. +Kroxylicious will reject invalid combinations of interfaces. + +=== The Filter Result + +As seen above, filter methods (`onXyz[Request|Response]`) must return a `CompletionStage` object. +It is the job of `FilterResult` to convey what message is to forwarded to the next filter in the chain (or broker +/client if at the chain's beginning or end). It is also used to carry instructions such as indicating that the +connection must be closed, or a message dropped. + +If the filter returns a `CompletionStage` that is already completed normally, Kroxylicious will immediately perform +the action described by the `FilterResult`. + +The filter may return a `CompletionStage` that is not yet completed. When this happens, Kroxylicious will pause +reading from the downstream (the Client writes will eventually block), and it begins to queue up in-flight +requests/responses arriving at the filter. This is done so that message order is maintained. Once the +`CompletionStage` completes, the action described by the `FilterResult` is performed, reading from the downstream +resumes and any queued up requests/responses are processed. + +IMPORTANT: The pausing of reads from the downstream is a relatively costly operation. To maintain optimal performance +filter implementations should minimise the occasions on which an incomplete `CompletionStage` is returned. + +If the `CompletionStage` completes exceptionally, the connection is closed. This also applies if the +`CompletionStage` does not complete within a timeout (20000 milliseconds). + +==== Creating a Filter Result +The `FilterContext` is the factory for the `FilterResult` objects. + +There are two convenience methods{empty}footnote:[The `context.forward*()` methods behave exactly as the builder form +`.forward(header, message).complete()`] that simply allow a filter to forward a result to the next filter. +We've already seen these in action above. + +* `context.forwardRequest(header, request)` used by result filter to forward a request. +* `context.forwardResponse(header, response)` used by result filter to forward a request. + +To access richer features, use the filter result builders `context.requestFilterResultBuilder()` and +`responseFilterResultBuilder()`. + +Filter result builders allow you to: + +1. forward a request/response: `.forward(header, request)`. +2. signal that a connection is to be closed: `.withCloseConnection()`. +3. signal that a message is to be dropped (i.e. not forwarded): `.drop()`. +4. for requests only, send a short-circuit response: `.shortCircuitResponse(header, response)` + +The builder lets you combine legal behaviours together. For instance, to close the connection after forwarding +a response to a client, a response filter could use: + +[source,yaml] +---- +return context.responseFilterResultBuilder() + .forward(header, response) + .withCloseConnection() + .complete(); +---- + +The builders yield either a completed `CompletionStage` which can be returned directly from the +filter method, or bare `FilterResult`. The latter exists to support asynchronous programming styles allowing you +to use your own Futures. + +IMPORTANT: The `drop` behaviour can be legally used in very specific circumstances. The Kafka Protocol is, +for the most part, strictly request/response with responses expected in the order the request were sent. The client +will fail if the contract isn't upheld. The exception is `Produce` where `acks=0`. Filters may drop these requests without +introducing a protocol error. + +=== The protocol filter lifecycle + +Instances of the filter class are created on demand when a protocol message is first sent by a client. +Instances are specific to the channel between a single client and a single broker. + +It exists while the client remains connected. + +=== Handling state + +The simplest way of managing per-client state is to use member fields. +The proxy guarantees that all methods of a given filter instance will always be invoked on the same thread (also true of +the CompletionStage completion in the case of <>). +Therefore, there is no need to use synchronization when accessing such fields. + +See the {api-javadoc}/io/kroxylicious/proxy/filter/package-summary.html#implementing.threadSafety[`io.kroxylicious.proxy.filter`^] +package javadoc for more information on thread-safety. + +=== Filter Patterns + +Kroxylicious Protocol Filters support several patterns: + +1. <> +2. <> +3. <> +4. <> + +==== Intercepting Requests and Responses + +This is a common pattern, we want to inspect or modify a message. For example: + +[source,java] +---- +public class SampleFetchResponseFilter implements FetchResponseFilter { + @Override + public CompletionStage onFetchResponse(short apiVersion, + ResponseHeaderData header, + FetchResponseData response, + FilterContext context) { + mutateResponse(response, context); //<1> + return context.forwardResponse(header, response); //<2> + } +} +---- +<1> We mutate the response object. For example, you could alter the records that have been fetched. +<2> We forward the response, sending it towards the client, invoking Filters downstream of this one. + +NOTE: We can only forward the response and header objects passed into the `onFetchResponse`. New instances are not +supported. + +==== Sending Response messages from a Request Filter towards the Client (Short-circuit responses) + +In some cases we may wish to not forward a request from the client to the Cluster. Instead, we want to intercept that +request and generate a response message in a Kroxylicious Protocol Filter and send it towards the client. This is called +a short-circuit response. + +.Illustration of responding without proxying +[a2s, format="svg"] +.... +.----------------------------------------------------------------------------------------------------------------------. +| | +| '---------------------------------------------------------------' | +| |[Kroxylicious] | | +| | | | +| | '----------------------------------------------------' | '--------------------' | +| | |[Virtual Cluster] | | |[Cluster] | | +| '-------------' | | '----------' '----------' '----------' | | | '------------' | | +| |[Client] | | | |[Filter1] | |[Filter2] | |[Filter3] | | | | |[Broker] | | | +| | |======|===|==>| |====>| | | | | | | | | | | +| | | A | | | F(A)-->B | B | F(B)-->C | | | | | | | | | | +| | | | | | | | : | | | | | | | | | | +| | |<=====|===|===| |<====| : | | | | | | | | | | +| | | W | | | f(C)-->W | C | <======+ | | | | | | | | | | +| '-------------' | | '----------' '----------' '----------' | | | '------------' | | +| | | | | '--------------------' | +| | '----------------------------------------------------' | | +| | | | +| '---------------------------------------------------------------' | +| | +.----------------------------------------------------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +For example: + +[source,java] +---- +public class CreateTopicRejectFilter implements CreateTopicsRequestFilter { + + public CompletionStage onCreateTopicsRequest(short apiVersion, RequestHeaderData header, CreateTopicsRequestData request, + FilterContext context) { + CreateTopicsResponseData response = new CreateTopicsResponseData(); + CreateTopicsResponseData.CreatableTopicResultCollection topics = new CreateTopicsResponseData.CreatableTopicResultCollection(); // <1> + request.topics().forEach(creatableTopic -> { + CreateTopicsResponseData.CreatableTopicResult result = new CreateTopicsResponseData.CreatableTopicResult(); + result.setErrorCode(Errors.INVALID_TOPIC_EXCEPTION.code()).setErrorMessage(ERROR_MESSAGE); + result.setName(creatableTopic.name()); + topics.add(result); + }); + response.setTopics(topics); + return context.requestFilterResultBuilder().shortCircuitResponse(response).completed(); // <2> + } +} +---- +<1> Create a new instance of the corresponding response data and populate it. Note you may need to use the `apiVersion` +to check which fields can be set at this request's API version. +<2> We generate a short-circuit response that will send it towards the client, invoking Filters downstream of this one. + +This will respond to all Create Topic requests with an error response without forwarding any of those requests to the Cluster. + +===== Closing the connections + +There is a useful variation on the pattern above, where the filter needs, in addition to sending an error +response, also to cause the connection to close. This is useful in use-cases where the filter wishes to disallow +certain client behaviours. + +[source,java] +---- +public class DisallowAlterConfigs implements AlterConfigsRequestFilter { + + @Override + public CompletionStage onAlterConfigsRequest(short apiVersion, RequestHeaderData header, AlterConfigsRequestData request, + FilterContext context) { + var response = new AlterConfigsResponseData(); + response.setResponses(request.resources().stream() + .map(a -> new AlterConfigsResourceResponse() + .setErrorCode(Errors.INVALID_CONFIG.code()) + .setErrorMessage("This service does not allow this operation - closing connection")) + .toList()); + return context.requestFilterResultBuilder() + .shortCircuitResponse(response) + .withCloseConnection() // <1> + .completed(); + } +} +---- +<1> We enable the close connection option on the builder. This will cause Kroxylicious to close the connection +after the response is sent to the client. + +==== Sending asynchronous requests to the Cluster + +Filters can make additional asynchronous requests to the Cluster. This is useful if the Filter needs additional +information from the Cluster in order to know how to mutate the filtered request/response. + +The Filter can make use of {java-17-javadoc}/java.base/java/util/concurrent/CompletionStage.html[CompletionStage^] +chaining features ([`#thenApply()` etc.) to organise for actions to be done once the asynchronous request completes. +For example, it could chain an action that mutates the filtered request/response using the asynchronous response, and +finally, chain an action to forward the request/response to the next filter. + +The asynchronous request/response will be intercepted by Filters upstream of this Filter. Filters downstream of this +Filter (and the Client) do not see the asynchronous response. + +Let's take a look at an example. We'll send an asynchronous request towards the Cluster for topic metadata while +handling a FetchRequest and use the response to mutate the FetchRequest before passing it to the next filter in the chain. + +[source,java] +---- +public class FetchFilter implements FetchRequestFilter { + public static final short METADATA_VERSION_SUPPORTING_TOPIC_IDS = (short) 12; + + @Override + public CompletionStage onFetchRequest(ApiKeys apiKey, + RequestHeaderData header, + FetchRequestData request, + FilterContext context) { + var metadataRequestHeader = new RequestHeaderData().setRequestApiVersion(METADATA_VERSION_SUPPORTING_TOPIC_IDS); // <1> + var metadataRequest = new MetadataRequestData(); // <2> + var topic = new MetadataRequestData.MetadataRequestTopic(); + topic.setTopicId(Uuid.randomUuid()); + metadataRequest.topics().add(topic); + var stage = context.sendRequest(metadataRequestHeader, metadataRequest); // <3> + return stage.thenApply(metadataResponse -> mutateFetchRequest(metadataResponse, request)) // <4> + .thenCompose(mutatedFetchRequest -> context.forwardRequest(header, mutatedFetchRequest)); // <5> + } +} +---- +<1> We construct a header object for the asynchronous request. It is important to specify the API version of the request +that is to be used. The version chosen must be a version known to the Kafka Client used by Kroxylicious +and must be an API version supported by the Target Cluster. +<2> We construct a new request object. When constructing the request object, care needs to be taken to ensure the request is populated with the structure which matches the API version you have chosen. Refer to the {kafka-protocol}[Kafka Protocol Guide] for more details. +<3> We asynchronously send the request towards the Cluster and obtain a CompletionStage which will contain the response. +<4> We use a computation stage to mutate the filtered fetch request using the response from the request sent at <3>. +<5> We use another computation stage to forward the mutated request. + +As you have read above, we need to know the API version we want our request to be encoded at. Your filter can discover +what versions of an API the Kafka Cluster supports. To do this use the +{api-javadoc}/io/kroxylicious/proxy/ApiVersionsService.html[ApiVersionsService^] available from the `FilterContext` +to determine programmatically what versions of an API are support and then write code to make a suitable `request` +object. + +NOTE: Kroxylicious provides the guarantee that computation stages chained using the _default execution methods_ are +executed on the same thread as the rest of the Filter work, so we can safely mutate Filter members without synchronising. +See the {api-javadoc}/io/kroxylicious/proxy/filter/package-summary.html#implementing.threadSafety[`io.kroxylicious.proxy.filter`^] +package javadoc for more information on thread-safety. + +==== Filtering specific API Versions + +> Kafka has a "bidirectional" client compatibility policy. In other words, new clients can talk to old servers, and old clients can talk to new servers. This allows users to upgrade either clients or servers without experiencing any downtime. +> +> Since the Kafka protocol has changed over time, clients and servers need to agree on the schema of the message that they are sending over the wire. This is done through API versioning. +> +> Before each request is sent, the client sends the API key and the API version. These two 16-bit numbers, when taken together, uniquely identify the schema of the message to follow. +> -- https://kafka.apache.org/protocol.html#protocol_compatibility + +You may wish to restrict your Filter to only apply to specific versions of an API. For example, "intercept all FetchRequest +messages greater than api version 7". To do this you can override a method named `shouldHandleXyz[Request|Response]` on your filter like: + +[source,java] +---- +public class FetchFilter implements FetchRequestFilter { + + @Override + public boolean shouldHandleFetchRequest(short apiVersion) { + return apiVersion > 7; + } + + @Override + @Override + public CompletionStage onRequest(ApiKeys apiKey, + RequestHeaderData header, + ApiMessage body, + FilterContext filterContext) { + return context.forwardRequest(header, request); + } +} +---- + +=== Filter Construction and Configuration +For Kroxylicious to instantiate and configure your custom filter we use Java's {java-17-javadoc}/java.base/java/util/ServiceLoader.html[ServiceLoader^] API. +Each Custom Filter should provide a corresponding {api-javadoc}/io/kroxylicious/proxy/filter/FilterFactory.html[FilterFactory^] +implementation that can create an instance of your custom Filter. The factory can optionally declare a configuration class that Kroxylicious will +populate (using Jackson) when loading your custom Filter. The module must package a `META-INF/services/io.kroxylicious.proxy.filter.FilterFactory` +file containing the classnames of each filter factory implementation into the JAR file. + +For example in the kroxylicious-samples we have the {github}/blob/main/kroxylicious-sample/src/main/java/io/kroxylicious/sample/config/SampleFilterConfig.java[SampleFilterConfig] class. +This is used in the {github}/blob/main/kroxylicious-sample/src/main/java/io/kroxylicious/sample/SampleFetchResponseFilter.java[SampleFetchResponseFilter]). The configuration is routed to the Filter instance via the +{github}/blob/main/kroxylicious-sample/src/main/java/io/kroxylicious/sample/SampleFetchResponseFilterFactory.java[SampleFetchResponseFilterFactory]. + +Then, when we configure a filter in Kroxylicious configuration like: + +[source,yaml] +---- +filters: +- type: SampleFetchResponseFilterFactory + config: + findValue: a + replacementValue: b +---- +Kroxylicious will deserialize the `config` object into a `SampleFilterConfig` and use it to construct a +`SampleFetchResponseFilter` passing the `SampleFilterConfig` instance as a constructor argument. + +== Packaging filters + +Filters are packaged as standard `.jar` files. A typical Custom Filter jar contains: + +1. Filter implementation classes +2. A FilterFactory implementation per Filter and service metadata (see <>) \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/con-deploying.adoc b/docs/v0.7.0/_files/modules/con-deploying.adoc new file mode 100644 index 0000000..1a929b4 --- /dev/null +++ b/docs/v0.7.0/_files/modules/con-deploying.adoc @@ -0,0 +1,398 @@ +// file included in the following: +// +// index.adoc + +[id='con-deploying-{context}'] += Deploying proxies + +[role="_abstract"] +A deployment of Kroxylicious proxies requires configuration of virtual clusters representing Kafka clusters. +It is also recommended to enable TLS for secure connections. + +== Selecting plugins +Before configuring your virtual clusters, ensure that the necessary filter plugins are available. +Place the required filter JAR files in a designated directory accessible by Kroxylicious. + +//== Providing implementations for facades + +== Configuring virtual clusters + +As described earlier, the _Virtual Cluster_ is the downstream representation of a Kafka Cluster. +Kafka clients connect to the virtual cluster. + +You must define at least one virtual cluster. +One virtual cluster is required for each listener of a Kafka cluster. + +Let's look at how that is done by considering first a simple example. After we will look at more advanced options +including TLS. + +[source, yaml] +---- +virtualClusters: + demo: # <1> + targetCluster: + bootstrap_servers: myprivatecluster:9092 # <2> + clusterNetworkAddressConfigProvider: + type: PortPerBrokerClusterNetworkAddressConfigProvider # <3> + config: + bootstrapAddress: mypublickroxylicious:9192 # <4> +---- +<1> The name of the virtual cluster. +<2> The bootstrap of the (physical) Kafka Cluster. This is the Kafka Cluster being proxied. +<3> The name of a cluster network address config provider. The built-in types are `PortPerBrokerClusterNetworkAddressConfigProvider` and `SniRoutingClusterNetworkAddressConfigProvider`. +<4> The hostname and port of the bootstrap that will be used by the Kafka Clients. The hostname must be resolved +by the clients. + +This configuration declares a virtual cluster called `demo`. The physical Kafka Cluster being proxied is the defined +by the `targetCluster` element. In this example, the `PortPerBroker` scheme is used by Kroxylicious to present the +virtual cluster to the clients. Under this schema, Kroxylicious will open a port for each broker of the target cluster +with port numbers beginning at `9192` +1. So, if the target cluster has three brokers, Kroxylicious will bind 9192 for +bootstrap and 9193-9195 inclusive to allow the clients to connect to each broker. + +=== Cluster Network Address Config Providers + +The Cluster Network Address Config Provider controls how the virtual cluster is presented to the network. Two +alternatives are supported: `PortPerBroker` and `SniRouting` which have different characteristics which make each +suitable for different use-cases. They are described next. + +==== PortPerBroker scheme + +In the `PortPerBroker` scheme, Kroxylicious automatically opens a port for each virtual cluster's bootstrap and +one port per broker of each target cluster. So, if you have two virtual clusters, each targeting a Kafka Cluster +of three brokers, Kroxylicious will bind eight ports in total. + +`PortPerBroker` is designed to work best with simplistic configurations. It is preferable if the target cluster has +sequential, stable broker ids and a known minimum broker id (like 0,1,2 for a cluster of 3 brokers). It can work with +non-sequential broker ids, but you would have to expose `maxBrokerId - minBrokerId` ports, which could be a huge +number if your cluster included broker ids `0` and `20000`. + +Kroxylicious monitors the broker topology of the target cluster at runtime. It will adjust the number of open ports +dynamically. If another broker is added to the Kafka Cluster, Kroxylicious will automatically open a port for it. +Similarly, if a broker is removed from the Kafka Cluster, Kroxylicious will automatically close the port that was +assigned to it. + +The `PortPerBroker` scheme can be used with either clear text or TLS downstream connections. + +[source, yaml] +---- +clusterNetworkAddressConfigProvider: + type: PortPerBrokerClusterNetworkAddressConfigProvider + config: + bootstrapAddress: mycluster.kafka.com:9192 # <1> + brokerAddressPattern: mybroker-$(nodeId).mycluster.kafka.com # <2> + brokerStartPort: 9193 # <3> + numberOfBrokerPorts: 3 # <4> + lowestTargetBrokerId: 1000 # <5> + bindAddress: 192.168.0.1 # <6> +---- +<1> The hostname and port of the bootstrap that will be used by the Kafka Clients. +<2> (Optional) The broker address pattern used to form the broker addresses. If not defined, it defaults to the +hostname part of the `bootstrapAddress` and the port number allocated to the broker. +<3> (Optional) The starting number for broker port range. Defaults to the port of the `bootstrapAddress` plus 1. +<4> (Optional) The _maximum_ number of brokers of ports that are permitted. Defaults to 3. +<5> (Optional) The lowest broker id of the target cluster. Defaults to 0. This should be the lowest https://kafka.apache.org/documentation/#brokerconfigs_node.id[`node.id`] (or https://kafka.apache.org/documentation/#brokerconfigs_broker.id[`broker.id`]) defined in the target cluster. +<6> (Optional) The bind address used when binding the ports. If undefined, all network interfaces will be bound. + +The `brokerAddressPattern` configuration parameter understands the replacement token `$(nodeId)`. It is optional. +If present, it will be replaced by the https://kafka.apache.org/documentation/#brokerconfigs_node.id[`node.id`] (or +`broker.id`) assigned to the broker of the target cluster. + +For example if your configuration looks like the above and your cluster has three brokers, your Kafka Client will receive +broker address information like this: + +[source] +---- +0. mybroker-0.mycluster.kafka.com:9193 +1. mybroker-1.mycluster.kafka.com:9194 +2. mybroker-2.mycluster.kafka.com:9194 +---- + +NOTE: It is a responsibility for the deployer of Kroxylicious to ensure that the bootstrap and generated broker +DNS names are resolvable and routable by the Kafka Client. + +The `numberOfBrokerPorts` imposes a maximum on the number of brokers that a Kafka Cluster can have. Set this value +to be as high as the maximum number of brokers that your operational rules allow for a Kafka Cluster. + +Note that each broker's id must be greater than or equal to `lowestTargetBrokerId`, and less than `lowestTargetBrokerId + numberOfBrokerPorts`. +The current strategy for mapping node ids to ports is `nodeId -> brokerStartPort + nodeId - lowestTargetBrokerId`. So a +configuration like: + +[source, yaml] +---- +clusterNetworkAddressConfigProvider: + type: PortPerBrokerClusterNetworkAddressConfigProvider + config: + bootstrapAddress: mycluster.kafka.com:9192 + brokerStartPort: 9193 + numberOfBrokerPorts: 3 + lowestTargetBrokerId: 1000 +---- + +can only map broker ids 1000, 1001 and 1002 to ports 9193, 9194 and 9195 respectively. You would have to reconfigure +`numberOfBrokerPorts` to accommodate new brokers being added to the cluster. + +==== RangeAwarePortPerNode scheme + +The original PortPerBroker scheme has the limitation that we only control the lowest target brokerId and a maximum +number of brokers. We then expect all brokerIds to fall into the range [lowestBrokerId, lowestBrokerId + maxBrokerCount) +We must be able to map every possible broker id to a unique port so that in a cluster of Kroxylicious proxies all +members will deterministically map nodeId X to a port Y. Meaning that we may need to allocate many ports that are +not required if there are large gaps between nodeIds in the target cluster. + +The Range Aware Port Per Node schema introduces the idea of Node ID Ranges, allowing you to model what nodeId ranges exist in +the target cluster so that the proxy can expose a more compact number of ports but still retain this deterministic mapping +from nodeId to port. + +Aside from how it maps nodeIds to ports it behaves the same as Port-Per-Broker. + +[source, yaml] +---- +clusterNetworkAddressConfigProvider: + type: RangeAwarePortPerNodeClusterNetworkAddressConfigProvider + config: + bootstrapAddress: mycluster.kafka.com:9192 # <1> + brokerAddressPattern: mybroker-$(nodeId).mycluster.kafka.com # <2> + brokerStartPort: 9193 # <3> + nodeIdRanges: # <4> + - name: brokers # <5> + range: + startInclusive: 0 # <6> + endExclusive: 3 # <7> +---- +<1> The hostname and port of the bootstrap that will be used by the Kafka Clients. +<2> (Optional) The broker address pattern used to form the broker addresses. If not defined, it defaults to the +hostname part of the `bootstrapAddress` and the port number allocated to the broker. +<3> (Optional) The starting number for broker port range. Defaults to the port of the `bootstrapAddress` plus 1. +<4> The list of Node ID rangers, must be non-empty. +<5> Name of the range, must be unique within the nodeIdRanges list. +<6> Start of the range (inclusive) +<7> End of the range (exclusive). Must be greater than startInclusive, empty ranges are not allowed. + +NodeIdRanges must be distinct, a nodeId cannot be part of more than one range. + +The `brokerAddressPattern` configuration parameter understands the replacement token `$(nodeId)`. It is optional. +If present, it will be replaced by the https://kafka.apache.org/documentation/#brokerconfigs_node.id[`node.id`] (or +`broker.id`) assigned to the broker of the target cluster. + +For example: if I have a target cluster using KRaft that looks like: + +- nodeId: 0, roles: controller +- nodeId: 1, roles: controller +- nodeId: 2, roles: controller +- nodeId: 1000, roles: broker +- nodeId: 1001, roles: broker +- nodeId: 1002, roles: broker +- nodeId: 99999, roles: broker + +Then we can model this as three Node Id Ranges: + +[source, yaml] +---- + clusterNetworkAddressConfigProvider: + type: RangeAwarePortPerNodeClusterNetworkAddressConfigProvider + config: + bootstrapAddress: mycluster.kafka.com:9192 + nodeIdRanges: + - name: controller + range: + startInclusive: 0 + endExclusive: 3 + - name: brokers + range: + startInclusive: 1000 + endExclusive: 1003 + - name: broker-outlier + range: + startInclusive: 99999 + endExclusive: 100000 +---- + +Which will result in this mapping from nodeId to Port + +- nodeId: 0 -> port 9193 +- nodeId: 1 -> port 9194 +- nodeId: 2 -> port 9195 +- nodeId: 1000 -> port 9196 +- nodeId: 1001 -> port 9197 +- nodeId: 1002 -> port 9198 +- nodeId: 99999 -> port 9199 + +==== SniRouting scheme + +In the `SniRouting` scheme, Kroxylicious uses SNI information to route traffic to either the boostrap or individual +brokers. As this relies on SNI (Server Name Indication), the use of <> is *required*. + +With this scheme, you have the choice to share a single port across all virtual clusters, or you can assign a different +port to each. Single port operation may have cost advantages when using load balancers of public clouds, as it allows +a single cloud provider load balancer to be shared across all virtual clusters. + +[source, yaml] +---- +clusterNetworkAddressConfigProvider: + type: SniRoutingClusterNetworkAddressConfigProvider + config: + bootstrapAddress: mycluster.kafka.com:9192 # <1> + brokerAddressPattern: mybroker-$(nodeId).mycluster.kafka.com # <2> + bindAddress: 192.168.0.1 # <3> +---- +<1> The hostname and port of the bootstrap that will be used by the Kafka Clients. +<2> The broker address pattern used to form the broker addresses. The `$(nodeId)` token must be present. +<3> (Optional) The bind address used when binding the port. If undefined, all network interfaces will be bound. + +The `brokerAddressPattern` configuration parameters requires that the `$(nodeId)` token is present within it. +This is replaced by the https://kafka.apache.org/documentation/#brokerconfigs_node.id[`node.id`] (or `broker.id) +assigned to the broker of the target cluster. This allows Kroxylicious to generate separate routes for each broker. + +NOTE: It is a responsibility for the deployer of Kroxylicious to ensure that the bootstrap and generated broker +DNS names are resolvable and routable by the Kafka Client. + +=== Transport Layer Security (TLS) + +In this section we look at how to enable TLS for either the downstream and/or upstream. Note, there is no +interdependency; it is supported to have TLS configured for the downstream and use clear text communications for the +upstream, or vice-versa. + +NOTE: TLS is recommended for both upstream and downstream for production configurations. + +==== Downstream TLS + +Here's how to enable TLS for the downstream side. This means the Kafka Client will connect to the virtual cluster over +TLS rather than clear text. For this, you will need to obtain a TLS certificate for the virtual cluster from your +Certificate Authority. + +NOTE: When requesting the certificate ensure that the certificate will match the names of the virtual cluster's +bootstrap and broker addresses. This may mean making use of wildcard certificates and/or Subject Alternative Names (SANs). + +Kroxylicious accepts key material in PKCS12 or JKS keystore format, or PEM formatted file(s). The following configuration +illustrates configuration with PKCS12 keystore. + +[source, yaml] +---- +virtualClusters: + demo: + tls: + key: + storeFile: /opt/cert/server.p12 # <1> + storePassword: + passwordFile: /opt/cert/store.password # <2> + keyPassword: + passwordFile: /opt/cert/key.password # <3> + storeType: PKCS12 # <4> + clusterNetworkAddressConfigProvider: + ... +---- +<1> File system location of a keystore (or in the case of `PEM` format a text file containing the concatenation of the +private key, certificate, and intermediates). +<2> File system location of a file containing the key store's password. +<3> (Optional) File system location of a file containing the key's password. If omitted the key store's password is +used to decrypt the key too. +<4> (Optional) Store type. Supported types are: `PKCS12`, `JKS` and `PEM`. Defaults to Java default key store type +which is usually `PKCS12`. + +Alternatively, if your key material is in separate PEM files (private key, and certificate/intermediates), the following +configuration may be used: + +[source, yaml] +---- +virtualClusters: + demo: + tls: + key: + privateKeyFile: /opt/cert/server.key # <1> + certificateFile: /opt/cert/server.crt # <2> + keyPassword: + passwordFile: /opt/cert/key.password # <3> + clusterNetworkAddressConfigProvider: + ... +---- +<1> File system location of the server private key. +<2> File system location of the server certificate and intermediate(s). +<3> (Optional) File system location of a file containing the key's password. + +NOTE: For the private-key, https://datatracker.ietf.org/doc/html/rfc5208[PKCS-8 keys] are supported by default. +For https://datatracker.ietf.org/doc/html/rfc8017[PKCS-1 keys], https://www.bouncycastle.org/[Bouncycastle] libraries +must be added to the Kroxylicious classpath.See https://github.com/netty/netty/issues/7323 for more details. + +[id='con-deploying-upstream-tls-{context}'] +==== Upstream TLS + +Here's show to enable TLS for the upstream side. +This means that Kroxylicious connects to the (physical) Kafka Cluster over TLS. +For this, your Kafka Cluster must have already been configured to use TLS. + +By default, Kroxylicious inherits what it trusts from the platform it is running on and uses this to determine whether +the Kafka Cluster is trusted or not. + +To support cases where trust must be overridden (such as use-cases involving the use of private CAs or self-signed +certificates), Kroxylicious accepts override trust material in PKCS12 or JKS keystore format, or PEM formatted +certificates. + +The following illustrates enabling TLS, inheriting platform trust: + +[source, yaml] +---- +virtualClusters: + demo: + targetCluster: + bootstrap_servers: myprivatecluster:9092 + tls: {} <1> + #... +---- +<1> Use an empty object to enable TLS inheriting trust from the platform. + +The following illustrates enabling TLS but with trust coming from a PKCS12 trust store instead of the platform: + +[source, yaml] +---- +virtualClusters: + demo: + targetCluster: + bootstrap_servers: myprivatecluster:9092 + tls: + trust: + storeFile: /opt/cert/trust.p12 # <1> + storePassword: + passwordFile: /opt/cert/store.password # <2> + storeType: PKCS12 # <3> + #... +---- +<1> File system location of a truststore (or in the case of `PEM` format a text file containing the certificates). +<2> File system location of a file containing the trust store's password. +<3> (Optional) Trust store type. Supported types are: `PKCS12`, `JKS` and `PEM`. Defaults to Java default key store type (PKCS12). + +The following illustrates connection to physical cluster using TLS client authentication (aka Mutual TLS). + +[source, yaml] +---- +virtualClusters: + demo: + targetCluster: + bootstrap_servers: myprivatecluster:9092 + tls: + key: + privateKeyFile: /opt/cert/client.key + certificateFile: /opt/cert/client.cert + trust: + storeFile: /opt/cert/client/server.cer + storeType: PEM +---- +It is also possible to disable trust so that Kroxylicious will connect to any Kafka Cluster regardless of its certificate +validity. + +WARNING: This option is not recommended for production use. + +[source, yaml] +---- +virtualClusters: + demo: + targetCluster: + bootstrap_servers: myprivatecluster:9092 + tls: + trust: + insecure: true # <1> + #... +---- +<1> Enables insecure TLS. + +YAML +Proxy level configuration diff --git a/docs/v0.7.0/_files/modules/con-operating.adoc b/docs/v0.7.0/_files/modules/con-operating.adoc new file mode 100644 index 0000000..70d0c6e --- /dev/null +++ b/docs/v0.7.0/_files/modules/con-operating.adoc @@ -0,0 +1,174 @@ +// file included in the following: +// +// index.adoc + +[id='con-operating-{context}'] += Operating proxies + +[role="_abstract"] +When operating proxies, introduce custom logging configurations with `log4j2` and set root log levels. +Set up an admin HTTP endpoint for Prometheus metrics scraping and integrate Micrometer for enhanced observability. +Configure common tags and standard binders for JVM and system metrics to ensure comprehensive monitoring and efficient proxy operation. + +== Logging + +The Kroxylicious {github-releases}[binary distribution^] bundles https://logging.apache.org/log4j/2.x[log4j2] as a logging backend. To customize the logging configuration users can: + +=== Use an Alternative Log4j configuration file + +When using {start-script}[bin/kroxylicious-start.sh^] from the binary distribution, you can optionally set an environment variable: + +[source,shell] +---- +KROXYLICIOUS_LOGGING_OPTIONS="-Dlog4j2.configurationFile=/path/to/custom/log4j2.yaml" +---- + +to load an alternative log4j2 configuration. + +=== Change the Root Log level + +When using {start-script}[bin/kroxylicious-start.sh^] from the binary distribution and using the default logging configuration file, you can set an environment variable: + +[source,shell] +---- +KROXYLICIOUS_ROOT_LOG_LEVEL="DEBUG" +---- + +to configure the root log level (note this will be very verbose at DEBUG/TRACE) + +== Monitoring and observability + +Kroxylicious uses micrometer as a facade for gathering metrics. A Prometheus backend is the only supported implementation so far. + +=== Admin HTTP Endpoint + +Kroxylicious offers a configurable, insecure, HTTP endpoint for administration purposes. It can +offer: + +- Prometheus scrape endpoint at `/metrics` + +#### minimal configuration example + +[source,yaml] +---- +adminHttp: + endpoints: + prometheus: {} +---- +Defaults to binding to `0.0.0.0:9190`. If no endpoints are configured the admin http server +is not created. + +#### complete configuration example + +[source,yaml] +---- +adminHttp: + host: localhost # <1> + port: 9999 # <2> + endpoints: + prometheus: {} # <3> +---- + +<1> Bind address for the server specified using either a hostname or interface address. If omitted, it will bind to all interfaces + (i.e. `0.0.0.0`). +<2> Port number to be bound. If omitted port `9190` will be bound. +<3> Enables the prometheus endpoint. + +=== Micrometer Metrics + +Kroxylicious integrates with https://micrometer.io/docs[micrometer]. + +> Micrometer provides a simple facade over the instrumentation clients for the most popular observability systems, allowing you to instrument your JVM-based application code without vendor lock-in. + +==== complete configuration example + +[source,yaml] +---- +adminHttp: + endpoints: + prometheus: {} +micrometer: + - type: "CommonTagsHook" + config: + commonTags: + zone: "euc-1a" # <1> + - type: "StandardBindersHook" + config: + binderNames: + - "JvmGcMetrics" # <2> +---- +This configuration: + +<1> configures a common tag on the global micrometer registry of `zone: euc-1a` to add to all metrics (becomes a label in prometheus) +<2> registers a JvmGcMetrics binder with the global registry (shipped with micrometer) + +Prometheus is connected to the micrometer global registry so Filters can record metrics against +it, and they will be available as part of the Prometheus scrape data. + +If you executed `curl localhost:9999/metrics` you should see metrics like: + +---- +jvm_gc_memory_allocated_bytes_total{zone="euc-1a",} 0.0 +---- + +==== Common Tags + +We can add common tags that will be added to all metrics. These will be available as labels +from the Prometheus scrape. + +To configure common tags use configuration: + +[source,yaml] +---- + - type: "CommonTagsHook" + config: + commonTags: + zone: "euc-1a" + owner: "becky" +---- + +==== Standard Binders + +Micrometer has a concept of MeterBinder: + +> Binders register one or more metrics to provide information about the state of some aspect of the application or its container. + +By registering some standard binders shipped with micrometer you can expose metrics +about the JVM and system which can observe JVM memory usage, garbage collection +and other behaviour. + +To configure multiple binders you can use configuration like: + +[source, yaml] +---- +micrometer: + - type: "StandardBindersHook" + config: + binderNames: + - "JvmGcMetrics" + - "JvmHeapPressureMetrics" +---- + +And those named binders will be bound to the global meter registry + +.Available Binders +|=== +|name |micrometer class +|ClassLoaderMetrics| io.micrometer.core.instrument.binder.jvm.ClassLoaderMetrics +|JvmCompilationMetrics|io.micrometer.core.instrument.binder.jvm.JvmCompilationMetrics +|JvmGcMetrics|io.micrometer.core.instrument.binder.jvm.JvmGcMetrics +|JvmHeapPressureMetrics|io.micrometer.core.instrument.binder.jvm.JvmHeapPressureMetrics +|JvmInfoMetrics|io.micrometer.core.instrument.binder.jvm.JvmInfoMetrics +|JvmMemoryMetrics|io.micrometer.core.instrument.binder.jvm.JvmMemoryMetrics +|JvmThreadMetrics|io.micrometer.core.instrument.binder.jvm.JvmThreadMetrics +|FileDescriptorMetrics|io.micrometer.core.instrument.binder.system.FileDescriptorMetrics +|ProcessorMetrics|io.micrometer.core.instrument.binder.system.ProcessorMetrics +|UptimeMetrics|io.micrometer.core.instrument.binder.system.UptimeMetrics +|=== + +#### Micrometer Usage from Filters + +Filters can use the static methods of https://www.javadoc.io/doc/io.micrometer/micrometer-core/1.10.5/io/micrometer/core/instrument/Metrics.html[Metrics] +to register metrics with the global registry. Or use `Metrics.globalRegistry` to +get a reference to the global registry. Metrics registered this way will be +automatically available through the prometheus scrape endpoint. diff --git a/docs/v0.7.0/_files/modules/con-proxy-overview.adoc b/docs/v0.7.0/_files/modules/con-proxy-overview.adoc new file mode 100644 index 0000000..0603a49 --- /dev/null +++ b/docs/v0.7.0/_files/modules/con-proxy-overview.adoc @@ -0,0 +1,253 @@ +// Module included in the following: +// +// assembly-overview.adoc + +[id='con-proxy-overview-{context}'] += Why use proxies? + +Proxies are a powerful and flexible architectural pattern. +For Kafka, they can be used to add functionality to Kafka clusters which is not available out-of-the-box with Apache Kafka. +In an ideal world, such functionality would be implemented directly in Apache Kafka. +But there are numerous practical reasons that can prevent this, for example: + +* Organizations having very niche requirements which are unsuitable for implementation directly in Apache Kafka. +* Functionality which requires changes to Kafka's public API and which the Apache Kafka project is unwilling to implement. + This is the case for https://lists.apache.org/thread/x1p119hkpoy01vq9ck3d0ql67jtvm875[broker interceptors], for example. +* Experimental functionality which might end up being implemented in Apache Kafka eventually. +For example using Kroxylicious it's easier to experiment with alternative transport protocols, such as Quic, or operating system APIs, such as io_uring, because there is already support for this in Netty, the networking framework on which Kroxylicious is built. + +== How Kroxylicious works + +First let's define the concepts in the landscape surrounding Kroxylicious. + +. _Kafka Client_, or _Client_ refers to any client application using a Kafka Client library to talk to a *Kafka Cluster*. +. _Kafka Cluster_ or _Cluster_ refers to a cluster comprising one or more Kafka Brokers. +. _Downstream_ refers to the area between Kafka Client and Kroxylicious. +. _Upstream_ refers to the area between Kroxylicious and a Kafka Cluster. + +.Kroxylicious landscape +[a2s, format="svg"] +.... +.---------------------------------------------------------------------------. +| : | +| : +===============+ | +| : :[Cluster] : | +| .-----------------. : .----------. : | +| .----------. |[Kroxylicious] | : |[Broker1] | : | +| |[Client] | | +------->| | : | +| | +------->| | : | | : | +| | | | | : .----------. : | +| | | | | : : | +| .----------. | | : .----------. : | +| | | : |[Broker2] | : | +| | +------->| | : | +| | | : | | : | +| | | : .----------. : | +| .----------. | | : : | +| |[Client] | | | : .----------. : | +| | | | | : |[Broker3] | : | +| | +------->| +------->| | : | +| | | | | : | | : | +| .----------. | | : .----------. : | +| .-----------------. .===============+ | +| : | +| : | +| [downstream] : [upstream] | +.---------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +Now let's define some concepts used within Kroxylicious itself. + +=== Virtual cluster + +The _Virtual Cluster_ is the downstream representation of a Kafka Cluster. At the conceptual level, a Kafka Client +connects to a Virtual Cluster. Kroxylicious proxies all communications made to the Virtual Cluster through to a +(physical) Kafka Cluster, passing it through the _Filter Chain_. + +So far, this explanation has elided the detail of Kafka Brokers. Let's talk about that now. + +The Virtual Cluster automatically exposes a bootstrap endpoint for the Virtual Cluster. This is what the Kafka Client +must specify as the https://kafka.apache.org/documentation/#producerconfigs_bootstrap.servers[bootstrap.servers] property +in the client configuration. + +In addition to the bootstrap endpoint, Kroxylicious automatically exposes broker endpoints. There is one broker endpoint +for each broker of the physical cluster. When the Client connects to a broker endpoint, Kroxylicious proxies all +communications to the corresponding broker of the (physical) Kafka Cluster. + +Kroxylicious automatically intercepts all the Kafka RPC responses that contain a broker address. It rewrites the address +so that it refers to the corresponding broker endpoint of the Virtual Cluster. This means when the Kafka Client +goes to connect to, say broker 0, it does so through the Virtual Cluster. + +=== Target cluster + +The _Target Cluster_ is the definition of physical Kafka Cluster within the Kroxylicious itself. + +A _Virtual Cluster_ has exactly one _Target Cluster_. + +There can be a _one-to-one_ relationship between Virtual Clusters and Target Clusters. +The other possibility is _many-to-one_, where many Virtual Clusters point to the same Target Cluster. The +many-to-one pattern is exploited by filters such as the xref:available-filters.adoc#_multi_tenancy[Multi-tenancy] +filter. + +.One-to-One relationship between Virtual Cluster and Target Cluster +[a2s, format="svg"] +.... +.-------------------------------------------------------------------------------------. +| | +| '-------------------------------------' '====================' | +| '---------' |[Kroxylicious] | :[Cluster] : | +| |[Client] |=+ | | : : | +| '---------' : | '----------------------------' | : '---------' : | +| : | |[Virtual Cluster] |===+====|=====>|[Broker] | : | +| +===|===>| | | : | | : | +| : | | | | : '---------' : | +| +===|===>| | | : : | +| | | | | : '---------' : | +| | | |===+====:==+==>|[Broker] | : | +| | '----------------------------' | : | | : | +| | | : '---------' : | +| '-------------------------------------' '====================' | +.-------------------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +.Many-to-one between Virtual Cluster and Target Cluster +[a2s, format="svg"] +.... +.-------------------------------------------------------------------------------------. +| | +| '-------------------------------------' '====================' | +| '---------' |[Kroxylicious] | :[Cluster] : | +| |[Client] |=+ | | : : | +| '---------' : | '----------------------------' | : '---------' : | +| : | |[Virtual Cluster A] |===+=+==|==+==>|[Broker] | : | +| +===|===>| | | : : | | : | +| : | | | | : : '---------' : | +| +===|===>| | | : : : | +| | | | | : : '---------' : | +| | | |===+====:==+==>|[Broker] | : | +| | '----------------------------' | : : : | | : | +| '---------' | | : : : '---------' : | +| |[Client] |=+ | '----------------------------' | : : : : | +| '---------' : | |[Virtual Cluster B] |===+=+ '====================' | +| +===|===>| | | : | +| : | | | | : | +| +===|===>| | | : | +| | | | | : | +| | | |===+=======+ | +| | '----------------------------' | | +| '-------------------------------------' | +.-------------------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +A one-to-many pattern, where one Virtual Cluster points to many Target Clusters (providing amalgamation), +is not a supported use-case. + +=== Filter chain + +A _Filter Chain_ consists of an *ordered list* of pluggable _protocol filters_. + +A _protocol filter_ implements some logic for intercepting, inspecting and/or manipulating Kafka protocol messages. +Kafka protocol requests (such as `Produce` requests) pass sequentially through each of the protocol filters in the +chain, beginning with the 1st filter in the chain and then following with the subsequent filters, before being +forwarded to the broker. + +When the broker returns a response (such as a `Produce` response) the protocol filters in the chain are invoked in the +reverse order (that is, beginning with the nth filter in the chain, then the n-1th and so on) with each having the +opportunity to inspect and/or manipulating the response. Eventually a response is returned to the client. + +The description above describes only the basic capabilities of the protocol filter. Richer features of filters +are described later. + +// TODO document additional filter behaviours https://github.com/kroxylicious/kroxylicious/issues/420 + +.Illustration of a request and response being manipulated by filters in a chain +[a2s, format="svg"] +.... +.----------------------------------------------------------------------------------------------------------------------. +| | +| '---------------------------------------------------------------' | +| |[Kroxylicious] | | +| | | | +| | '----------------------------------------------------' | '--------------------' | +| | |[Virtual Cluster] | | |[Cluster] | | +| '-------------' | | '----------' '----------' '----------' | | | '------------' | | +| |[Client] | | | |[Filter1] | |[Filter2] | |[Filter3] | | | | |[Broker] | | | +| | |======|===|==>| |====>| |====>| |===|======|======|===>| | | | +| | | A | | | F(A)-->B | B | F(B)-->C | C | F(C)-->D | | | | D | | | | +| | | | | | | | | | | | | | | | | | +| | |<=====|===|===| |<====| |<====| |<==|======|======|====| | | | +| | | W | | | f(X)-->W | X | f(Y)-->X | Y | f(Z)-->Y | | | | Z | | | | +| '-------------' | | '----------' '----------' '----------' | | | '------------' | | +| | | | | '--------------------' | +| | '----------------------------------------------------' | | +| | | | +| '---------------------------------------------------------------' | +| | +.----------------------------------------------------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +As mentioned above, Kroxylicious takes the responsibility to rewrite the Kafka RPC responses that carry broker address +information so that they reflect the broker addresses exposed by the Virtual Cluster. These are the +https://kafka.apache.org/protocol.html#The_Messages_Metadata[`Metadata`], +https://kafka.apache.org/protocol.html#The_Messages_DescribeCluster[`DescribeCluster`] and +https://kafka.apache.org/protocol.html#The_Messages_FindCoordinator[`FindCoordinator`] responses. This processing is +entirely transparent to the work of the protocol filters. _Filter authors_ are free to write their own filters that +intercept these responses too. + +=== Filter composition + +An important principal for the protocol filter API is that filters should _compose_ nicely. +That means that filters generally don't know what other filters might be present in the chain, and what they might be doing to messages. +When a filter forwards a request or response it doesn't know whether the message is being sent to the next filter in the chain, or straight back to the client. + +Such composition is important because it means a _proxy user_ can configure multiple filters (possibly written by several _filter authors_) and expect to get the combined effect of all of them. + +It's never quite that simple, of course. +In practice they will often need to understand what each filter does in some detail in order to be able to operate their proxy properly, for example by understanding whatever metrics each filter is emitting. + +== Implementation + +The proxy is written in Java, on top of https://netty.io[Netty]. +The usual https://netty.io/4.1/api/io/netty/channel/ChannelHandler.html[`ChannelHandlers`] provided by the Netty project are used where appropriate (e.g. SSL support uses https://netty.io/4.1/api/io/netty/handler/ssl/SslHandler.html[`SslHandler`]), and Kroxylicious provides Kafka-specific handlers of its own. + +The Kafka-aware parts use the Apache Kafka project's own classes for serialization and deserialization. + +Protocol filters get executed using a handler-per-filter model. + +== Deployment topologies + +The proxy supports a range of possible deployment topologies. +Which style is used depends on what the proxy is meant to _achieve_, architecturally speaking. +Broadly speaking a proxy instance can be deployed: + +As a forward proxy:: +Proxying the access of one or more clients to a particular cluster/broker that might also accessible (to other clients) directly. ++ +// TODO include a diagram ++ +Topic-level encryption provides one example use case for a forward proxy-style deployment. +This might be applicable when using clients that don't support interceptors, or if an organization wants to apply the same encryption policy in a single place, securing access to the keys within their network. + +As a reverse proxy:: +Proxying access for all clients trying to reach a particular cluster/broker. ++ +// TODO include a diagram ++ +Transparent multi-tenancy provides an example use case for a reverse proxy. +While Apache Kafka itself has some features that enable multi-tenancy, they rely on topic name prefixing as the primary mechanism for ensuring namespace isolation. +Tenants have to adhere to the naming policy and know they're a tenant of a larger shared cluster. ++ +_Transparent_ multi-tenancy means each tenant has the illusion of having their own cluster, with almost complete freedom over topic and group naming, while still actually sharing a cluster. + +// TODO we probably don't need the level of detail below, just summarize +// and provide the detail in the deploying section + +We can further classify deployment topologies in how many proxy instances are used. +For example: + +* Single proxy instance (sidecar) +* Proxy pool \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/multi-tenancy/proc-multi-tenancy.adoc b/docs/v0.7.0/_files/modules/multi-tenancy/proc-multi-tenancy.adoc new file mode 100644 index 0000000..3b561b1 --- /dev/null +++ b/docs/v0.7.0/_files/modules/multi-tenancy/proc-multi-tenancy.adoc @@ -0,0 +1,44 @@ +// file included in the following: +// +// assembly-multi-tenancy-filter.adoc + +[id='proc-multi-tenancy-{context}'] += (Preview) Setting up the Multi-tenancy filter + +[role="_abstract"] +This procedure describes how to set up the Multi-tenancy filter by configuring it in Kroxylicious. +The filter dynamically prefixes resource names to create isolation between tenants using the same Kafka cluster. +The prefix representing a tenant is taken from the name of the virtual cluster representing the tenant. +For example, if the virtual cluster is named `tenant-1`, the prefix is `tenant-1`. +Each tenant must be represented by a unique virtual cluster, and virtual cluster names must be globally unique within the Kroxylicious configuration. +This means that the same virtual cluster name cannot be used to represent different Kafka clusters. + +.Prerequisites + +* An instance of Kroxylicious. +For information on deploying Kroxylicious, see the link:{github}[samples and examples^]. +* A config map for Kroxylicious that includes the configuration for creating virtual clusters and filters. +* A virtual cluster definition for each tenant using the Kafka cluster. +You need at least two virtual clusters to apply multi-tenancy. + +.Procedure + +. Configure a `MultiTenantTransformationFilterFactory` type filter. ++ +[source, yaml] +---- +filters: + - type: MultiTenantTransformationFilterFactory + config: + prefixResourceNameSeparator: "." # <1> +---- +<1> The separator used for the prefix. +If a separator is not specified, `-` is the default. ++ +NOTE: Currently, only the prefix with separator is validated. + +. Verify that multi-tenancy filtering has been applied. ++ +For example, create a topic through each virtual cluster and check that the topics are prefixed with the name of the corresponding virtual cluster. ++ +For more information, see the {github}/blob/main/kubernetes-examples/filters/multi-tenant/script.txt[example for a Kubernetes environment]. \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/oauthbearer/con-oauthbearer.adoc b/docs/v0.7.0/_files/modules/oauthbearer/con-oauthbearer.adoc new file mode 100644 index 0000000..485f7ab --- /dev/null +++ b/docs/v0.7.0/_files/modules/oauthbearer/con-oauthbearer.adoc @@ -0,0 +1,105 @@ +// file included in the following: +// +// assembly-built-in-filters.adoc + +[id='con-oauthbearer-{context}'] += OAUTHBEARER validation + +[role="_abstract"] +OauthBearerValidation filter enables a validation on the JWT token received from client before forwarding it to cluster. + +If the token is not validated, then the request is short-circuited. +It reduces resource consumption on the cluster when a client sends too many invalid SASL requests. + +[a2s, format="svg"] +.... +.----------------------------------------------------------------------------------------. +| | +| '---------' '---------------' '----------' | +| | | | | | | | +| | Client | | Kroxylicious | | Cluster | | +| | | | | | | | +| '---------' '---------------' '----------' | +| | | | | +| | Handshake request | | | +| |==============================>| Forward handshake request | | +| | |===============================>| | +| | | Handshake response | | +| | Handshake response |<===============================| | +| |<==============================| | | +| | | | | +| | | | | +| | Authenticate request | | | +| |==============================>| | | +| | |==+ | | +| | | : Validate Token | | +| | |<=+ | | +| | | | | +| '=======|===============================|==' | | +| :if | [Validation fails] | : | | +| : | | : | | +| : | Invalid token | : | | +| : |<==============================| : | | +| : | | : | | +| '=======|===============================|==' | | +| | | | | +| '=======|===============================|================================|==' | +| :if | [Validation succeeds] | | : | +| : | | Forward authenticate request | : | +| : | |===============================>| : | +| : | | Authenticate response | : | +| : | |<===============================| : | +| : | Authenticate response | | : | +| : |<==============================| | : | +| : | | | : | +| '=======|===============================|================================|==' | +| | | | | +| | | | | +| '---------' '---------------' '----------' | +| | | | | | | | +| | Client | | Kroxylicious | | Cluster | | +| | | | | | | | +| '---------' '---------------' '----------' | +| | +.----------------------------------------------------------------------------------------. +[0,0]: {"fill":"#99d","a2s:delref":1} +.... + +== How to use the filter + +There are two steps to using the filter. + +1. <> +2. Configuring the filter within Kroxylicious. + +=== Configuring the filter within Kroxylicious. + +[source, yaml] +---- +filters: + - type: OauthBearerValidation + config: + jwksEndpointUrl: https://oauth/JWKS #<1> + jwksEndpointRefreshMs: 3600000 #<2> + jwksEndpointRetryBackoffMs: 100 #<3> + jwksEndpointRetryBackoffMaxMs: 10000 #<4> + scopeClaimName: scope #<5> + subClaimName: sub #<6> + authenticateBackOffMaxMs: 60000 #<7> + authenticateCacheMaxSize: 1000 #<8> + expectedAudience: https://first.audience, https//second.audience #<9> + expectedIssuer: https://your-domain.auth/ #<10> +---- + +<1> The OAuth/OIDC provider URL from which the provider's JWKS (JSON Web Key Set) can be retrieved. +<2> The (optional) value in milliseconds for the broker to wait between refreshing its JWKS (JSON Web Key Set) cache that contains the keys to verify the signature of the JWT. +<3> The (optional) value in milliseconds for the initial wait between JWKS (JSON Web Key Set) retrieval attempts from the external authentication provider. +<4> The (optional) value in milliseconds for the maximum wait between attempts to retrieve the JWKS (JSON Web Key Set) from the external authentication provider. +<5> This (optional) setting can provide a different name to use for the scope included in the JWT payload's claims. +<6> This (optional) setting can provide a different name to use for the subject included in the JWT payload's claims. +<7> The (optional) maximum value in milliseconds to limit the client sending authenticate request. Setting 0 will never limit the client. Otherwise, an exponential delay is added to each authenticate request until the authenticateBackOffMaxMs has been reached. +<8> The (optional) maximum number of failed tokens kept in cache. +<9> The (optional) comma-delimited setting for the broker to use to verify that the JWT was issued for one of the expected audiences. +<10> The (optional) setting for the broker to use to verify that the JWT was created by the expected issuer. + +Note: OauthBearer config follows https://kafka.apache.org/documentation/#security_ssl[kafka's properties] \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-key-creation.adoc b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-key-creation.adoc new file mode 100644 index 0000000..901adc2 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-key-creation.adoc @@ -0,0 +1,31 @@ +// file included in the following: +// +// assembly-aws-kms.adoc + +[id='con-aws-kms-key-creation-{context}'] += Creating AWS KMS keys + +As the administrator, use either the AWS Console or CLI to +{aws}/kms/latest/developerguide/create-keys.html#create-symmetric-cmk[create] a *Symmetric key* with *Encrypt and decrypt* +usage. Multi-region keys are supported. It is not possible to make use of keys from other AWS accountsfootnote:[https://github.com/kroxylicious/kroxylicious/issues/1217]. + +Give the key an alias as described in xref:con-aws-kms-setup-{context}[]. + +If using the CLI, this can be done with commands like this: + +[source,shell] +---- +KEY_ALIAS="KEK_" +KEY_ID=$(aws kms create-key | jq -r '.KeyMetadata.KeyId') +# the create key command will produce JSON output including the KeyId +aws kms create-alias --alias-name alias/${KEY_ALIAS} --target-key-id ${KEY_ID} +---- + +Once the key is created, it is recommended to use a key rotation policy. + +[source,shell] +---- +aws kms enable-key-rotation --key-id ${KEY_ID} --rotation-period-in-days 180 +---- + + diff --git a/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-service-config.adoc b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-service-config.adoc new file mode 100644 index 0000000..16916e8 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-service-config.adoc @@ -0,0 +1,30 @@ +// file included in the following: +// +// assembly-aws-kms.adoc + +[id='con-aws-kms-service-config-{context}'] += Configuring the AWS KMS + +For AWS KMS, the KMS configuration looks like this. + +[source, yaml] +---- +kms: AwsKmsService # <1> +kmsConfig: + endpointUrl: https://kms..amazonaws.com # <2> + tls: # <3> + accessKey: + passwordFile: /opt/aws/accessKey # <4> + secretKey: + passwordFile: /opt/aws/secretKey # <5> + region: # <6> +---- +<1> Name of the KMS provider. This must be `AwsKmsService`. +<2> AWS Endpoint URL. This must include the `https://` scheme part. +<3> (Optional) TLS trust configuration. +<4> File containing the AWS Access Key +<5> File containing the AWS Secret Key +<6> AWS region identifier (e.g. `us-east-1`) + +For TLS trust and TLS client authentication configuration, the filter accepts the same TLS parameters as xref:con-deploying-upstream-tls-{context}[Upstream TLS] +except the `PEM` store type is currently https://github.com/kroxylicious/kroxylicious/issues/933[not supported]. diff --git a/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-setup.adoc b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-setup.adoc new file mode 100644 index 0000000..a84762e --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/aws-kms/con-aws-kms-setup.adoc @@ -0,0 +1,102 @@ +// file included in the following: +// +// assembly-aws-kms.adoc + +[id='con-aws-kms-setup-{context}'] += Establish an aliasing convention for keys within AWS KMS + +The filter references KEKs within AWS via an {aws}/kms/latest/developerguide/alias-about.html[AWS key alias]. + +Establish a naming convention for key aliases to keep the filter’s keys separate from those used by other systems. +Here, we use a prefix of KEK_ for filter aliases. +Adjust the instructions if a different naming convention is used. + +== Role of the administrator + +To use the filter, an administrator (or administrative process) must create the encryption keys within AWS KMS that will be used to encrypt the records. +The organization deploying the Record Encryption filter is responsible for managing this administrator or process. + +The administrator must have permissions to create keys in AWS KMS. +As a starting point, the built-in AWS policy `AWSKeyManagementServicePowerUser` confers sufficient key management privileges. + +To get started, use the following commands to set up an administrator with permissions suitable for managing encryption keys in KMS through an AWS Cloud Shell. +This example illustrates using the user name`kroxylicious-admin`, but you can choose a different name if preferred. +Adjust the instructions accordingly if you use a different user name. + +[source,shell] +---- +ADMIN=kroxylicious-admin +INITIAL_PASSWORD=$(tr -dc 'A-Za-z0-9!?%=' < /dev/urandom | head -c 10) +CONSOLE_URL=https://$(aws sts get-caller-identity --query Account --output text).signin.aws.amazon.com/console +aws iam create-user --user-name ${ADMIN} +aws iam attach-user-policy --user-name ${ADMIN} --policy-arn arn:aws:iam::aws:policy/AWSKeyManagementServicePowerUser +aws iam attach-user-policy --user-name ${ADMIN} --policy-arn arn:aws:iam::aws:policy/IAMUserChangePassword +aws iam attach-user-policy --user-name ${ADMIN} --policy-arn arn:aws:iam::aws:policy/AWSCloudShellFullAccess +aws iam create-login-profile --user-name ${ADMIN} --password ${INITIAL_PASSWORD} --password-reset-required +echo Now log in at ${CONSOLE_URL} with user name ${ADMIN} password ${INITIAL_PASSWORD} and change the password. +---- + +== Establish an application identity for the filter + +The filter must authenticate to AWS in order to perform envelope encryption operations, such as generating and decrypting DEKs. +Therefore, an AWS IAM identity with sufficient permissions must be created for the filter. + +Use AWS IAM to create this identity and generate an *Access Key* for it. +The Access Key/Secret Key pair is used by the filter. +Do not enable console access for this user. + +Using the CLI, the following commands create the IAM identity for the filter. +This example uses the user name `kroxylicious`, but you can choose a different name if needed. +Adjust the instructions accordingly if using a different user name. + +[source,shell] +---- +aws iam create-user --user-name kroxylicious +aws iam create-access-key --user-name kroxylicious +---- + +== Create an alias-based policy for KEK aliases + +Create an alias based policy granting permissions to use keys aliased by the established alias naming convention. + +[source,shell] +---- +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +cat > /tmp/policy << EOF +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AliasBasedIAMPolicy", + "Effect": "Allow", + "Action": [ + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + "Resource": [ + "arn:aws:kms:*:${AWS_ACCOUNT_ID}:key/*" + ], + "Condition": { + "ForAnyValue:StringLike": { + "kms:ResourceAliases": "alias/KEK_*" + } + } + } + ] +} +EOF +aws iam create-policy --policy-name KroxyliciousRecordEncryption --policy-document file:///tmp/policy +---- + +== Apply the alias-based policy to the filter's application identity + +Attach the alias policy to the filter's application identity. +This will allow the filter to perform key operations on all KEKs with aliases that match the specified naming convention. + +[source,shell] +---- +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +aws iam attach-user-policy --user-name kroxylicious --policy-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KroxyliciousRecordEncryption" +---- diff --git a/docs/v0.7.0/_files/modules/record-encryption/con-record-encryption-overview.adoc b/docs/v0.7.0/_files/modules/record-encryption/con-record-encryption-overview.adoc new file mode 100644 index 0000000..8d4fb52 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/con-record-encryption-overview.adoc @@ -0,0 +1,105 @@ +// file included in the following: +// +// assembly-record-encryption-filter.adoc + +[id='con-topic-encryption-overview-{context}'] += How encryption works + +[role="_abstract"] +The Record Encryption filter uses envelope encryption to encrypt records with symmetric encryption keys. +The filter encrypts records from produce requests and decrypts records from fetch responses. + +Envelope encryption:: +Envelope encryption is an industry-standard technique suited for encrypting large volumes of data in an efficient manner. +Data is encrypted with a Data Encryption Key (DEK). +The DEK is encrypted using a Key Encryption Key (KEK). +The KEK is stored securely in a Key Management System (KMS). +Symmetric encryption keys:: +AES(GCM) 256 bit encryption symmetric encryption keys are used to encrypt and decrypt record data. + +The process is as follows: + +. The filter intercepts produce requests from producing applications and transforms them by encrypting the records. +. The produce request is forwarded to the broker. +. The filter intercepts fetch responses from the broker and transforms them by decrypting the records. +. The fetch response is forwarded to the consuming application. + +The filter encrypts the record value only. +Record keys, headers, and timestamps are not encrypted. + +The entire process is transparent from the point of view of Kafka clients and Kafka brokers. +Neither are aware that the records are being encrypted, nor do they have any access to the encryption keys or have any influence on the ciphering process to secure the records. + +== How the filter encrypts records +The filter encrypts records from produce requests as follows: + +. Filter selects a KEK to apply. +. Requests the KMS to generate a DEK for the KEK. +. Uses an encrypted DEK (DEK encrypted with the KEK) to encrypt the record. +. Replaces the original record with a ciphertext record (encrypted record, encrypted DEK, and metadata). + +The filter uses a DEK reuse strategy. +Encrypted records are sent to the same topic using the same DEK until a time-out or an encryption limit is reached. + +== How the filter decrypts records +The filter decrypts records from fetch responses as follows: + +. Filter receives a cipher record from the Kafka broker. +. Reverses the process that constructed the cipher record. +. Uses KMS to decrypt the DEK. +. Uses the decrypted DEK to decrypt the encrypted record. +. Replaces the cipher record with a decrypted record. + +The filter uses an LRU (least recently used) strategy for caching decrypted DEKs. +Decrypted DEKs are kept in memory to reduce interactions with the KMS. + +== How the filter uses the KMS +To support the filter, the KMS provides the following: + +* A secure repository for storing Key Encryption Keys (KEKs) +* A service for generating and decrypting Data Encryption Keys (DEKs) + +KEKs stay within the KMS. +The KMS generates a DEK (which is securely generated random data) for a given KEK, then returns the DEK and an encrypted DEK. +The encrypted DEK has the same data but encrypted with the KEK. +The KMS doesn't store encrypted DEKs; they are stored as part of the cipher record in the broker. + +WARNING: The KMS must be available during runtime. +If the KMS is unavailable, the filter will not be able to obtain new encrypted DEKs on the produce path or decrypt encrypted DEKs on the consume path. The filter will continue to use previously obtained DEKs, but eventually, production and consumption will become impossible. +It is recommended to use the KMS in a high availability (HA) configuration. + +== Practicing key rotation + +Key rotation involves periodically replacing cryptographic keys with new ones and is considered a best practice in cryptography. + +The filter allows the rotation of Key Encryption Keys (KEKs) within the Key Management System (KMS). +When a KEK is rotated, the new key material is eventually used for newly produced records. Existing records, encrypted with older KEK versions, remain decryptable as long as the previous KEK versions are still available in the KMS. + +:fn-dek-refresh: footnote:[Assuming traffic is flowing regularly through your encrypted topic. The refresh is driven by \ +records that need encryption flowing through your topic. In cases where there are very infrequent messages flowing it is \ +possible for a DEK to be used up to 2 hours (by default) after creation.] + +When the KEK is rotated in the external KMS, it will take up to 1 hour (by default) before all{fn-dek-refresh} records produced by the filter +contain a DEK encrypted with the new key material. This is because existing encrypted DEKs are used for a configurable +amount of time after creation, the Filter caches the encrypted DEK, one hour after creation they are eligible to be refreshed. + +If you need to rotate key material immediately, execute a rolling restart of your cluster of Kroxylicious instances. + +WARNING: If an old KEK version is removed from the KMS, records encrypted with that key will become unreadable, causing fetch operations to fail. +In such cases, the consumer offset must be advanced beyond those records. + +== What part of a record is encrypted? + +The record encryption filter encrypts only the values of records, leaving record keys, headers, and timestamps untouched. +Null record values, which might represent deletions in compacted topics, are transmitted to the broker unencrypted. +This approach ensures that compacted topics function correctly. + +== Unencrypted topics + +You may configure the system so that some topics are encrypted and others are not. +This supports scenarios where topics with confidential information are encrypted and Kafka topics with non-sensitive information can be left unencrypted. + +[role="_additional-resources"] +.Additional resources + +* For more information on envelope encryption, see the link:https://www.nist.gov/publications/recommendation-key-management-part-1-general-1[NIST Recommendation for Key Management]. \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-key-creation.adoc b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-key-creation.adoc new file mode 100644 index 0000000..357f72f --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-key-creation.adoc @@ -0,0 +1,18 @@ +// file included in the following: +// +// assembly-hashicorp-vault.adoc + +[id='con-vault-key-creation-{context}'] += Creating HashiCorp Vault keys + +As the administrator, use either the HashiCorp UI or CLI to create AES-256 symmetric keys following your +key naming convention. The key type must be `aes256-gcm96`, which is Vault's default key type. + +TIP: It is recommended to use a key rotation policy. + +If using the Vault CLI, the command will look like: + +[source, shell] +---- +vault write -f transit/keys/KEK_trades type=aes256-gcm96 auto_rotate_period=90d +---- diff --git a/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-service-config.adoc b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-service-config.adoc new file mode 100644 index 0000000..a295b39 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-service-config.adoc @@ -0,0 +1,27 @@ +// file included in the following: +// +// assembly-hashicorp-vault.adoc + +[id='con-vault-service-config-{context}'] += Configuring the HashiCorp Vault KMS + +For HashiCorp Vault, the KMS configuration looks like this. Use the Vault Token and Vault Transit Engine URL values that +you gathered above. + +[source, yaml] +---- +kms: VaultKmsService # <1> +kmsConfig: + vaultTransitEngineUrl: # <2> + tls: # <3> + vaultToken: # <4> + passwordFile: /opt/vault/token + +---- +<1> Name of the KMS provider. This must be `VaultKmsService`. +<2> link:setup.adoc#_vault_transit_engine_url[Vault Transit Engine URL] including the protocol part, i.e. `https:` or `http:` +<3> (Optional) TLS trust configuration. +<4> File containing the Vault Token + +For TLS trust and TLS client authentication configuration, the filter accepts the same TLS parameters as xref:con-deploying-upstream-tls-{context}[Upstream TLS] +except the `PEM` store type is currently https://github.com/kroxylicious/kroxylicious/issues/933[not supported]. diff --git a/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-setup.adoc b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-setup.adoc new file mode 100644 index 0000000..f4a46ee --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/hashicorp-vault/con-vault-setup.adoc @@ -0,0 +1,146 @@ +// file included in the following: +// +// assembly-hashicorp-vault.adoc + +[id='con-vault-setup-{context}'] += Enable the Transit Engine + +The filter integrates with the {hashicorp-vault}/docs/secrets/transit[HashiCorp Vault *Transit +Engine*]. Vault does not enable the Transit Engine by default. It must be +{hashicorp-vault}/docs/secrets/transit#setup[enabled] before it can be used with the filter. + +[#_vault_transit_engine_url] +== Vault Transit Engine URL + +The Vault Transit Engine URL is required so the filter knows the location of the Transit Engine within the +Vault instance. + +The URL is formed from the concatenation of the `Api Address` (reported by Vault reported by during +{hashicorp-vault}/tutorials/getting-started/getting-started-dev-server#starting-the-dev-server[starts up]) with the +complete path to Transit Engine, including the name of the engine itself. If +{hashicorp-vault}/docs/enterprise/namespaces[Namespacing] is used on the Vault instance, the path needs to include the +namespace(s). The URL will end with `/transit` unless the `-path` parameter was used when +{hashicorp-vault}/docs/secrets/transit#setup[enabling the engine]. + +If namespacing is not in use, the URL will look like this: + +[source,shell] +---- +https://myvaultinstance:8200/v1/transit +---- + +If namespacing is in use, the path must include the namespaces. For example, if there is a parent namespace is `a` and +a child namespace is `b`, the URL will look like this: + +[source,shell] +---- +https://myvaultinstance:8200/v1/a/b/transit +---- + +If the name of the Transit engine was changed (using the `-path` argument to the `vault secrets enable transit` command) +the URL will look like this: + +[source,shell] +---- +https://myvaultinstance:8200/v1/mytransit +---- + +== Establish the naming convention for keys within Vault hierarchy + +Establish a naming convention for keys to keep the filter’s keys separate from those used by other systems. +Here, we use a prefix of KEK_ for filter key name. +Adjust the instructions if a different naming convention is used. + +== Role of the administrator + +To use the filter, an administrator (or administrative process) must create the encryption keys within Vault that will be used to encrypt the records. +The organization deploying the Record Encryption filter is responsible for managing this administrator or process. + +The administrator must have permissions to create keys beneath `transit/keys/KEK_*` in the Vault hierarchy. + +As a guideline, the minimal Vault policy required by the administrator is as follows: + +[source,shell] +---- +path "transit/keys/KEK_*" { +capabilities = ["read", "write"] +} +---- + +== Establish an application identity for the filter + +The filter must authenticate to Vault in order to perform envelope encryption operations, such as generating and decrypting DEKs +Therefore, a Vault identity with sufficient permissions must be created for the filter. + +Create a Vault policy for the filter: + +[source,shell] +---- +vault policy write kroxylicious_encryption_filter_policy - << EOF +path "transit/keys/KEK_*" { +capabilities = ["read"] +} +path "/transit/datakey/plaintext/KEK_*" { +capabilities = ["update"] +} +path "transit/decrypt/KEK_*" { +capabilities = [ "update"] +} +EOF +---- + +Create a {hashicorp-vault}/docs/concepts/tokens#periodic-tokens[Periodic] (long-lived) Vault Token +for the filterfootnote:[The example token create command illustrates the use of `-no-default-policy` +and `-orphan`. The use of these flags is not functionally important. You may adapt the +configuration of the token to suit the standards required by your organization]: + +[source,shell] +---- +vault token create -display-name "kroxylicious record encryption" \ + -policy=kroxylicious_encryption_filter_policy \ + -period=768h \ <1> + -no-default-policy \ <2> + -orphan <3> + +---- +<1> Causes the token to be periodic (with every renewal using the given period). +<2> Detach the "default" policy from the policy set for this token. This is done so the token has least-privilege. +<3> Create the token with no parent. This is done so that expiration of a parent won't expire the token used by the filter. + +The `token create` command yields the `token`. The `token` value is required later when configuring the vault within the +filter. + +[source] +---- +token hvs.CAESIFJ_HHo0VnnW6DSbioJ80NqmuYm2WlON-QxAPmiJScZUGh4KHGh2cy5KdkdFZUJMZmhDY0JCSVhnY2JrbUNEWnE +token_accessor 4uQZJbEnxW4YtbDBaW6yVzwP +token_policies [kroxylicious_encryption_filter_policy] +---- + +The token must be {hashicorp-vault}/docs/concepts/tokens#token-time-to-live-periodic-tokens-and-explicit-max-ttls[renewed] +before expiration. +It is the responsibility of the administrator to do this. + +This can be done with a command like: + +[source,shell] +---- +vault token renew --accessor +---- + +== Testing the application identity for the filter using the CLI + +To test whether the application identity and the policy are working correctly, a +https://raw.githubusercontent.com/kroxylicious/kroxylicious/main/scripts/validate_vault_token.sh[script] can be used. + +First, as the administrator, create a KEK in the hierarchy at this path `transit/keys/KEK_testkey`. + +[source,shell] +---- +VAULT_TOKEN= validate_vault_token.sh +---- + +The script should respond 'Ok'. If errors are reported check the policy/token configuration. + +`transit/keys/KEK_testkey` can now be removed. + diff --git a/docs/v0.7.0/_files/modules/record-encryption/proc-configuring-record-encryption-filter.adoc b/docs/v0.7.0/_files/modules/record-encryption/proc-configuring-record-encryption-filter.adoc new file mode 100644 index 0000000..2b4f1f2 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-encryption/proc-configuring-record-encryption-filter.adoc @@ -0,0 +1,60 @@ +// file included in the following: +// +// assembly-record-encryption-filter.adoc + +[id='proc-configuring-record-encryption-filter-{context}'] += Setting up the Record Encryption filter + +[role="_abstract"] +This procedure describes how to set up the Record Encryption filter. +Provide the filter configuration and the Key Encryption Key (KEK) selector to use. +The KEK selector maps topic name to key names. +The filter looks up the resulting key name in the KMS. + +.Prerequisites + +* An instance of Kroxylicious. +For information on deploying Kroxylicious, see the link:{github}[samples and examples^]. +* A config map for Kroxylicious that includes the configuration for creating virtual clusters and filters. +* A KMS is installed and set up for the filter with KEKs to encrypt records set up for topics. + +.Procedure + +. Configure a `RecordEncryption` type filter. ++ +.Example Record Encryption filter configuration +[source,yaml] +---- +filters: + - type: RecordEncryption + config: + kms: # <1> + kmsConfig: + # <2> + # ... + selector: # <3> + selectorConfig: + template: "KEK_${topicName}" # <4> + experimental: + encryptionDekRefreshAfterWriteSeconds: 3600 # <5> + encryptionDekExpireAfterWriteSeconds: 7200 # <6> + maxEncryptionsPerDek: 5000000 # <7> + # ... +---- +<1> The KMS service name. +<2> Configuration specific to the KMS provider. +<3> The Key Encryption Key (KEK) selector to use. The `${topicName}` is a literal understood by the proxy. +For example, if using the `TemplateKekSelector` with the template `KEK_$\{topicName}`, create a key for every topic that +is to be encrypted with the key name matching the topic name, prefixed by the string `KEK_`. +<4> The template for deriving the KEK, based on a specific topic name. +<5> How long after creation of a DEK before it becomes eligible for rotation. On the **next** encryption request, the cache will asynchronously create a new DEK. Encryption requests will continue to use the old DEK until the new DEK is ready. +<6> How long after creation of a DEK until it is removed from the cache. This setting puts an upper bound on how long a DEK can remain cached. +<7> The maximum number of records any DEK should be used to encrypt. After this limit is hit, that DEK will be destroyed and a new one created. + +NOTE: `encryptionDekRefreshAfterWriteSeconds` and `encryptionDekExpireAfterWriteSeconds` help govern the "originator usage period" of the DEK. That is the period of time the DEK will be used to encrypt records. Keeping the period short helps reduce the blast radius in the event that DEK key material is leaked. However, there is a trade-off. The additional KMS API calls will increase produce/consume latency and may increase your KMS provider costs. + + + + `maxEncryptionsPerDek` helps prevent key exhaustion by placing an upper limit of the amount of times that a DEK may be used to encrypt records. + +. Verify that the encryption has been applied to the specified topics by producing messages through the proxy and then consuming directly and indirectly from the Kafka cluster. + +NOTE: If the filter is unable to find the key in the KMS, the filter passes through the records belonging to that topic in the produce request without encrypting them. \ No newline at end of file diff --git a/docs/v0.7.0/_files/modules/record-validation/proc-record-validation.adoc b/docs/v0.7.0/_files/modules/record-validation/proc-record-validation.adoc new file mode 100644 index 0000000..5cda4a6 --- /dev/null +++ b/docs/v0.7.0/_files/modules/record-validation/proc-record-validation.adoc @@ -0,0 +1,94 @@ +// file included in the following: +// +// assembly-record-validation-filter.adoc + +[id='proc-record-validation-{context}'] += (Preview) Setting up the Record Validation filter + +[role="_abstract"] +This procedure describes how to set up the Record Validation filter. +Provide the filter configuration and rules that the filter uses to check against Kafka record keys and values. + +.Prerequisites + +* An instance of Kroxylicious. +For information on deploying Kroxylicious, see the link:{github}[samples and examples^]. +* A config map for Kroxylicious that includes the configuration for creating a virtual cluster. +* Apicurio Registry (if wanting to use Schema validation). + +.Procedure + +. Configure a `RecordValidation` type filter. + +[source,yaml] +---- +filters: + - type: RecordValidation + config: + rules: + - topicNames: # <1> + - + keyRule: + # <2> + valueRule: + # <3> + defaultRule: # <4> + keyRule: + # <2> + valueRule: + # <3> + forwardPartialRequests: false # <5> +---- +<1> List of topic names to which the validation rules will be applied. +<2> Validation rules that are applied to the record's key. +<3> Validation rules that are applied to the record's value. +<4> (Optional) Default rule that is applied to any topics for which there is no explict rule defined. +<5> If `false`, any record failing validation will cause the entire produce request to be rejected. + If `true`, only partitions within the request where all records validated successfully will be forwarded to the + broker. The response the client will receive will be a mixed response with response returned from the broker + for the partitions whose records passed validation and error responses for the partitions that failed validation. + If the client is using transactions, this setting is *ignored*. Any failing record validation will + cause the entire produce request to be rejected. + +Replace the token `` in the YAML configuration with either a Schema Validation rule or a JSON Syntax Validation rule depending on your requirements. + +.Example Schema Validation Rule Definition + +The Schema Validation rule validates that the key or value matches a schema identified by its global ID within an Apicurio Schema Registry. + +If the key or value does not adhere to the schema, the record will be rejected. + +Additionally, if the kafka producer has embedded a global ID within the record it will be validated against the global ID defined by the rule. If they do not match, the record will be rejected. See the +{apicurio-docs}/getting-started/assembly-using-kafka-client-serdes.html#_consumer_schema_configuration[Apicurio documentation^] for details +on how the global ID could be embedded into the record. +The filter supports extracting ID's from either the Apicurio `globalId` record header or from the initial bytes of the serialized content itself. + +[source,yaml] +---- +schemaValidationConfig: + apicurioGlobalId: 1001 # <1> + apicurioRegistryUrl: http://registry.local:8080 # <2> +allowNulls: true # <3> +allowEmpty: true # <4> +---- +<1> Apicurio registry global ID identifying the schema that will be enforced. +<2> Apicurio Registry endpoint. +<3> if `true`, the validator allows keys and or values to be `null`. The default is `false`. +<4> if `true`, the validator allows keys and or values to be empty. The default is `false`. + +NOTE: Schema validation mode currently has the capability to enforce only JSON schemas (https://github.com/kroxylicious/kroxylicious/issues/1431[issue]) + +.Example JSON Syntax Validation Rule Definition + +The JSON Syntax Validation rule validates that the key or value contains only syntactically correct JSON. + +[source,yaml] +---- +syntacticallyCorrectJson: + validateObjectKeysUnique: true # <1> +allowNulls: true # <2> +allowEmpty: true # <3> +---- +<1> If `true`, the validator enforces that objects keys must be unique. The default is `false`. +<2> if `true`, the validator allows keys and or values to be `null`. The default is `false`. +<3> if `true`, the validator allows keys and or values to be empty. The default is `false`. diff --git a/docs/v0.7.0/index.adoc b/docs/v0.7.0/index.adoc new file mode 100644 index 0000000..2ec11a3 --- /dev/null +++ b/docs/v0.7.0/index.adoc @@ -0,0 +1,5 @@ +--- +title: Kroxylicious Proxy v0.7.0 +--- + +include::_files/index.adoc[leveloffset=0]