From 86eb2b96a848206941ab2f73c186c5dcf421e67a Mon Sep 17 00:00:00 2001 From: Kevin Delgado Date: Thu, 9 Sep 2021 10:16:11 -0700 Subject: [PATCH] KEP-2885: Add Server-Side Unknown Field Validation KEP (#2886) * First draft - server-side unknown field validation * Respond to smarterclayton feedback on mergeMap and param advantages * add JSON Patch POC * Update TOC to fix CI * Update PRR * Remove feature flag, add prod-readiness, set to implementable * Add feature gate * Add note about client-go support * PRR feedback * Update TOC * rename query param * Update test plan * Address feedback around warning, update client-go option name * Warn note * spelling * Add support for warn option --- .../sig-api-machinery/2885.yaml | 3 + .../README.md | 963 ++++++++++++++++++ .../kep.yaml | 49 + 3 files changed, 1015 insertions(+) create mode 100644 keps/prod-readiness/sig-api-machinery/2885.yaml create mode 100644 keps/sig-api-machinery/2885-server-side-unknown-field-validation/README.md create mode 100644 keps/sig-api-machinery/2885-server-side-unknown-field-validation/kep.yaml diff --git a/keps/prod-readiness/sig-api-machinery/2885.yaml b/keps/prod-readiness/sig-api-machinery/2885.yaml new file mode 100644 index 00000000000..e4c8157dcbd --- /dev/null +++ b/keps/prod-readiness/sig-api-machinery/2885.yaml @@ -0,0 +1,3 @@ +kep-number: 2885 +alpha: + approver: "@deads2k" diff --git a/keps/sig-api-machinery/2885-server-side-unknown-field-validation/README.md b/keps/sig-api-machinery/2885-server-side-unknown-field-validation/README.md new file mode 100644 index 00000000000..75a78fafc93 --- /dev/null +++ b/keps/sig-api-machinery/2885-server-side-unknown-field-validation/README.md @@ -0,0 +1,963 @@ + +# KEP-2885: Server Side Unknown Field Validation + + + + + + +- [Release Signoff Checklist](#release-signoff-checklist) +- [Summary](#summary) +- [Motivation](#motivation) + - [Goals](#goals) + - [Non-Goals](#non-goals) +- [Proposal](#proposal) + - [Notes/Constraints/Caveats (Optional)](#notesconstraintscaveats-optional) + - [Future Work](#future-work) + - [Risks and Mitigations](#risks-and-mitigations) + - [Performance Considerations](#performance-considerations) +- [Design Details](#design-details) + - [Opt-in API Mechanism](#opt-in-api-mechanism) + - [Content-Type Header](#content-type-header) + - [Query Parameter](#query-parameter) + - [Create (POST) and Update (PUT)](#create-post-and-update-put) + - [Patch (PATCH)](#patch-patch) + - [JSON Patch](#json-patch) + - [Strategic Merge Patch](#strategic-merge-patch) + - [Apply Patch](#apply-patch) + - [Test Plan](#test-plan) + - [Graduation Criteria](#graduation-criteria) + - [Alpha](#alpha) +- [Production Readiness Review Questionnaire](#production-readiness-review-questionnaire) + - [Feature Enablement and Rollback](#feature-enablement-and-rollback) + - [Rollout, Upgrade and Rollback Planning](#rollout-upgrade-and-rollback-planning) + - [Monitoring Requirements](#monitoring-requirements) + - [Dependencies](#dependencies) + - [Scalability](#scalability) +- [Implementation History](#implementation-history) +- [Alternatives](#alternatives) + + +## Release Signoff Checklist + + + +Items marked with (R) are required *prior to targeting to a milestone / release*. + +- [x] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR) +- [x] (R) KEP approvers have approved the KEP status as `implementable` +- [x] (R) Design details are appropriately documented +- [x] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) + - [ ] e2e Tests for all Beta API Operations (endpoints) + - [ ] (R) Ensure GA e2e tests for meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) + - [ ] (R) Minimum Two Week Window for GA e2e tests to prove flake free +- [x] (R) Graduation criteria is in place + - [ ] (R) [all GA Endpoints](https://github.com/kubernetes/community/pull/1806) must be hit by [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) +- [x] (R) Production readiness review completed +- [x] (R) Production readiness review approved +- [x] "Implementation History" section is up-to-date for milestone +- [ ] User-facing documentation has been created in [kubernetes/website], for publication to [kubernetes.io] +- [ ] Supporting documentation—e.g., additional design documents, links to mailing list discussions/SIG meetings, relevant PRs/issues, release notes + + + +[kubernetes.io]: https://kubernetes.io/ +[kubernetes/enhancements]: https://git.k8s.io/enhancements +[kubernetes/kubernetes]: https://git.k8s.io/kubernetes +[kubernetes/website]: https://git.k8s.io/website + +## Summary + + +As a client sending a create, update, or patch request to the server, I want to +be able to instruct the server to fail when the kubernetes object I send has +fields that are not valid fields of the kubernetes resource. + +This will allow us to remove client-side validation from kubectl while +maintaining the same core functionality of erroring out on requests that contain +unknown or invalid fields. + +## Motivation + + +`kubectl –validate=true` is the current mechanism to indicate that a request +should fail if it specifies unknown fields on the object. + +There are a few issues with this as highlighted by the [previous +effort](https://docs.google.com/document/d/18nrtJ0gizVHnIhx5NIkGXvSaQ1wT2i6XVDUmmQqc7_4/edit#heading=h.q9616qdce0l9) to do +server-side validation, primarily these issues include: +* Bug Fixes are utilized slower because client-side upgrades are hard to get in +people’s hands. +* Each client needs to implement validation. + +This is the last remaining step for removing client-side validation according to +[this +comment](https://github.com/kubernetes/kubernetes/issues/39434#issuecomment-270486105) +* Client-side validation is [very + painful](https://github.com/kubernetes/kubernetes/issues/39434#issuecomment-270443399) + +Additional problems have been highlighted in the relevant github issues: +* https://github.com/kubernetes/kubernetes/issues/39434 +* https://github.com/kubernetes/kubernetes/issues/5889 +The community has been asking for this feature as recently as [August 8th, +2021](https://github.com/kubernetes/kubernetes/issues/104090#issuecomment-895228540) + +### Goals + + +* Server should validate that no extra fields are present or invalid (e.g. +misspelled), nor are any fields duplicated (for json and yaml data). +* We must maintain compatibility with all existing clients, thus server side +unknown field validation should be opt-in + +### Non-Goals + + +* Complete (business-logic) server-side validation of every aspect of an object +(i.e. we’re only focused on mismatched fields between the object in the request +body and its schema). +* Protobuf support. Theoretically, unknown protobuf fields could occur if clients +on version X send a request to the server of version less than X which does not recognize +some of the fields (or a similar situation). We do not think it is worth +supporting this use case initially and will error if clients attempt to validate +schema server-side with protobuf data. + + +## Proposal + + +We propose using an opt-in API mechanism (such as content-type header or query +param) to indicate to the server that it should fail when the kubernetes object +in the request body supplied to POST, PUT, and PATCH requests contains +extra/unknown fields. + +Clients such as kubectl will continue to use the `--validate=true` flag as +before, but instead of triggering validation on the client-side, it will +instruct the server to validate for unknown fields server-side. + +This change will be made in at least two steps, one where we introduce the +server side validation and a second where we modify kubectl to use the +server-side validation (and mark the existing client-side validation as +deprecated). + +### Notes/Constraints/Caveats (Optional) + +#### Future Work +After server side unknown field validation is implemented we can begin work to +deprecate and remove client side validate and have kubectl use server side +validation instead. A separate KEP will be published for that. + + + +### Risks and Mitigations + + + +### Performance Considerations +It is worth noting that checking for unknown fields whether via a json unmarshal +that explicitly breaks if it encounters unknown fields (as in the case of +create, update, or json patch) or extra logic around a merge step (as in +strategic merge patch or apply) will be less performant than not doing so. +[Initial +benchmarks](https://github.com/kubernetes/kubernetes/pull/104433#issuecomment-901398507) estimate ~20% slower 25-30% more memory consumption. + +This is deemed acceptable and insignificant because the use case we are +replacing is client side validation that happens from kubectl (i.e. via a human +that should not be too impacted by a slight performance hit). Any existing +automated clients that would be impacted by performance do not currently have a +way of leveraging server side validation and thus will remain opted-out of this +by default (or will be willing to tradeoff the performance if they do choose to +opt-in to server side schema validation). + + +## Design Details + + +### Opt-in API Mechanism + +There are a few ways we could allow a client to opt-in to server-side unknown +field validation, we present two options **Content-Type Header** and **query +parameter**. + +#### Content-Type Header + +Requests that contain a kubernetes object, also pass along with it a +content-type header such as “application/json”. One way to indicate to the +server that it should use strict schema validation and fail when unknown fields +are passed is to send a mime-type parameter indicating the strict validation +such as “application/json;validation=strict”. Alternatively we could use a new +header such as “X-Kubernetes-Validation:strict”. + +One could argue that this is the more appropriate way to parameterize opting-in +to server side schema validation because on the server we are fundamentally +treating the content as a different type (“application/json” is json data that +we don’t care if it has extra fields, “application/json;validation=strict” is +json data that is sensitive to extra fields). + +A precedent for using a mime-type parameter is from how we [receive resources as +tables](https://kubernetes.io/docs/reference/using-api/_print/#receiving-resources-as-tables). + +On the other hand, one could also argue that interpreting input strictly or not +according to the target schema is independent of the content type and that this +is an inappropriate way to parameterize validation opt-in. + +Another argument against this method is that for patch, strictness is handled +when decoding the *result* of the patch, so it wouldn’t make sense to use +Content-Type which is providing information about the inbound request body. + +#### Query Parameter + +Alternatively, if we don’t like the idea of using Content-Type header to +determine whether the apiserver should accept or fail on unknown fields, we +could pass a query param such as “?fieldValidation=Strict”. + +This might make it more obvious to consumers of the API that strict schema +validation is a choice of the client. On the other hand, query parameters are +more typically used for filtering/sorting data returned from the API server. + +Arguments for using query parameter include: +* Being able version them with the API version (unlike content-type which must + be versioned with the type) +* Parameters are more discoverable in openapi, less so for mime types. +* We have precedent for using query parameters for write requests already (via CreateOptions, + PatchOptions, UpdateOptions) + +For client-go support, we will add a `FieldValidation` field to +CreateOptions, PatchOptions, and UpdateOptions that can supply the query +parameter to the request. + +In addition to supporting the options `Strict` (for erroring on unknown fields) and `Ignore`, +we will also support the `?fieldValidation=Warn` option, to warn, rather than error on +unknown fields. + +We believe that using a query parameter is the best approach. + +### Create (POST) and Update (PUT) + +Implementation of this validation for Patch requests differ significantly and +are discussed in the Patch section below. + +At a high level, for create and update requests we have the opportunity to +validate the object when we unmarshal the object from wire format into the go +type. + +This happens in the [serializer +implementation](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apimachinery/pkg/runtime/serializer/json/json.go#L264-L291), where depending on if a “strict” +option is set (or not), the unmarshalling step will error (or not) if +extra/malformatted fields exist on the data being unmarshalled. + +This in turn, gets used by +[create](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/create.go#L95-L120) and [update](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/update.go#L106-L107) handlers when they attempt to +decode the request body. + +One major advantage of the “Content-Type” header approach is that it requires +little to no change existing code in the request handlers. + +When the API server is started and we create the [negotiated +serializer](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/server/genericapiserver.go#L616), we +create a [separate serializer](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apimachinery/pkg/runtime/serializer/codec_factory.go#L167) for each content type/media type. To support strict +validation all we would need to do is add [another +serializer](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apimachinery/pkg/runtime/serializer/codec_factory.go#L51-L106) to the +NegotiatedSerializer for strict validation that is identical to the serializer +of “application/json” but has the “strict” option set (as well as ones for +yaml). + +The request handler will then proceed unchanged, and the [Decode +step](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/create.go#L120) will fail +if it encounters fields in the request body that are not part of the schema. + +### Patch (PATCH) + +Unlike create and update requests, patch requests are more complicated because +they do not ever unmarshal/decode the request body into a kubernetes object. +Instead patching involves, serializing the existing object to json, then using a +jsonpatch or mergepatch library to combine the patch sent from the client with +the existing object, and finally deserializing the combined object into the +kubernetes object. + +During the jsonpatch or mergepatch step we don’t currently check if the patch +sent by the client has any erroneous fields. Now we need to. + +This all happens in the +[patchMechanism](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L293-L296) used by the patch handler. + +There are three types of patchers used by the patch handler: jsonPatcher, +smpPatcher, and applyPatcher that are each discussed individually. + +#### JSON Patch + +JSON patching is most commonly called when client-side apply looks to patch an +existing custom resource. + +For the jsonPatch implementation of +[applyPatchToCurrentObject](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L304), This is similar +to create and update where after using the jsonpatch library to generate the +[patchedObjJS](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L312) we then call [DecodeInto](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L319). + +Based on the presence of the validate query param, we should be able to use a +codec that is or is not doing strict decoding that will fail, in the strict +case, when invalid fields are present on the patched json blob. + +Currently, like create and update, the handler generates the [serializer from the +scope](https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L136-L146) and the codec from the serializer. We just need to ensure that it +conditionally uses a StrictSerializer based on the presence of the validate +query param. + +#### Strategic Merge Patch + +Strategic merge patching is most commonly called when client-side apply attempts +to patch an existing object of a builtin type. + +For the strategic merge patch (SMP) implementation, it calls into the +apimachinery strategicpatch library to update the fields of the original object +with the data from the patch. This goes through a few layers but it eventually +boils down to this +[mergeMap](https://github.com/kubernetes/kubernetes/blob/dadecb2c8932fd28de9dfb94edbc7bdac7d0d28f/staging/src/k8s.io/apimachinery/pkg/util/strategicpatch/patch.go#L1280) call that does the actual merge of the patch into +the original. + +`mergeMap` goest field-by-field through the existing object, checking to see if +data should be merged in from the patch. Currently, if there are any +extra/unknown fields in patch, they will never be encountered and will just be +ignored. + +`mergeMap` takes a mergeOptions argument that we can update to have some +configuration for strict validation. Within `mergeMap`, we can track the fields +of the patch as they are meged in the existing object. After all fields that can +be merged from the patch are, we will know that any fields on the patch that +were not merged are unknown fields and can error out. + +#### Apply Patch + +Apply Patching is called whenever we send a server side apply request to the +server. + +For apply patch, server side schema validation already occurs and is not +optional, no new behavior is needed here. The exception to this is that one can +create an arbitrary schema that is still a valid structural schema (see [docs](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#specifying-a-structural-schema) on +structural schema). If one manages to do this and permits unknown fields, then +server-side schema validation will not be responsible for invalidating these. + +For further context, the applyPatch path of the patch handler calls into the +fieldmanager’s [Apply](https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/patch.go#L441) method in order to generate the newly patched object that +will be stored to etcd + +This in turn, calls +[apply](https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/fieldmanager/structuredmerge.go#L106) on it’s own internal fieldmanager (which is what +actually implements the joining of the live object to the patch object to +produce the new object). It attempts to [convert the patch object into a +TypedValue](https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/fieldmanager/structuredmerge.go#L129). This checks the object against its schema and will error if the +object has any unknown or duplicate fields. + + +### Test Plan + + +We can unit test and benchmark the performance of each endpoint in +[apiserver/pkg/endpoints/apiserver_test.go](https://github.com/kubernetes/kubernetes/blob/dadecb2c8932fd28de9dfb94edbc7bdac7d0d28f/staging/src/k8s.io/apiserver/pkg/endpoints/apiserver_test.go). We will need to test that requests +fail with invalid fields for all types of fields (embedding, free-form fields, +etc). + +Additionally, we can add integration testing for all endpoints in +[test/integration/apiserver/apiserver_test.go](https://github.com/kubernetes/kubernetes/blob/master/test/integration/apiserver/apiserver_test.go) + +We will also have additional testing for changes to the strategic merge patch +logic in +[apimachinery/pkg/util/strategicpatch/patch_test.go](https://github.com/kubernetes/kubernetes/blob/dadecb2c8932fd28de9dfb94edbc7bdac7d0d28f/staging/src/k8s.io/apimachinery/pkg/util/strategicpatch/patch_test.go) + +### Graduation Criteria + +#### Alpha + +- Feature implemented behind a feature flag +- Integration tests added for all relevant verbs (POST, PUT, and PATCH) + + + +## Production Readiness Review Questionnaire + + + +### Feature Enablement and Rollback + + + +###### How can this feature be enabled / disabled in a live cluster? + + +- [x] Feature gate (also fill in values in `kep.yaml`) + - Feature gate name: UnknownFieldValidation + - Components depending on the feature gate: kube-apiserver +- [x] Other + - Describe the mechanism: query parameter + - Will enabling / disabling the feature require downtime of the control + plane? NO + - Will enabling / disabling the feature require downtime or reprovisioning + of a node? (Do not assume `Dynamic Kubelet Config` feature is enabled). NO + +###### Does enabling the feature change any default behavior? + +No, strict validation is false by default. + +###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? + +Yes. From the cluster operator's side, they can restart the kube-apiserver without +the UnknownFieldValidation flag set and this will disable the feature +cluster-wide. + +For end-users that no longer wish to perform server-side strict validation, +simply not setting the query param will ensure the server is not doing strict +validation. + +###### What happens if we reenable the feature if it was previously rolled back? + +No harm, requests will resume performing strict validation. The content of +stored data does not change when the feature is used compared to when it is off, +only new requests to the api-server will trigger strict validation. + +###### Are there any tests for feature enablement/disablement? + +Testing for both presence and absence of query param. + +### Rollout, Upgrade and Rollback Planning + +This section must be completed when targeting beta to a release. + +N/A + + +### Monitoring Requirements + +This section must be completed when targeting beta to a release. + + +###### How can someone using this feature know that it is working for their instance? + +The easiest way to know that the feature is enabled and working is to query the +API server with the query param set and passing an object with deliberately +invalid, unknown fields. An error from the server will indicate that the feature +is enabled as expected. + +Alternatively, and end-user can cross-reference the results of client-side +validation (i.e. `kubectl --validate=true`) with the results of server-side. If +client-side validation indicates an error but server-side does not, then the +feature is disabled. + + + +###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? + +Users should expect to see an increase in request latency and memory usage +(~20-25% increase) for requests that desire server side schema validation. + +Cluster operators can also use cpu usage monitoring to determine whether +increased usage of server-side schema validation dramatically increases CPU usage after feature enablement. + + +###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service? + +Pick one more of these and delete the rest. + +- [x] Metrics + - Metric name: CPU Usage + - Components exposing the metric: kube-apiserver +- [x] Metrics + - Metric name: Memory Consumption + - Components exposing the metric: kube-apiserver +- [x] Metrics + - Metric name: Request Latency + - Components exposing the metric: kube-apiserver + + +### Dependencies + +This section must be completed when targeting beta to a release. + +N/A + + + +### Scalability + + + +###### Will enabling / using this feature result in increasing time taken by any operations covered by existing SLIs/SLOs? + + +Mutating API calls that opt-in to validation will be slower ([initial +benchmarks](https://github.com/kubernetes/kubernetes/pull/104433#issuecomment-901398507) +estimate ~20% slower 25-30% more memory consumption) + + + +## Implementation History + + +* Proof of Concept [PR](https://github.com/kubernetes/kubernetes/pull/104433) for Create and Update. +* Proof of Concept [PR](https://github.com/kubernetes/kubernetes/pull/104619) for JSON Patch. + + + +## Alternatives +* Content-Type Header vs Query Param (see [#proposal](#proposal)) +* Passing multiple decoders around in the request scope +* Change the Decode signature itself (or adding a new DecodeStrict that Decode + calls into) +* Instead of erroring on unknown fields, we could warn on unknown fields and + then more generally have a `TreatWarningsAsErrors` option to fail on requests + with unknown fields. One drawback here is that it would be difficult to + flag unknown fields separately from other warnings that we might not want to + error on (e.g. using a deprecated API). See note in [#proposal](#proposal) + section around adding an option for warning on unknown fields. + + + + diff --git a/keps/sig-api-machinery/2885-server-side-unknown-field-validation/kep.yaml b/keps/sig-api-machinery/2885-server-side-unknown-field-validation/kep.yaml new file mode 100644 index 00000000000..7415430ab59 --- /dev/null +++ b/keps/sig-api-machinery/2885-server-side-unknown-field-validation/kep.yaml @@ -0,0 +1,49 @@ +title: Server Side Unknown Field Validation +kep-number: 2885 +authors: + - "@kevindelgado" +owning-sig: sig-api-machinery +participating-sigs: +status: implementable +creation-date: 2021-08-20 +reviewers: + - "@apelisse" + - "@jpbetz" + - "@smarterclayton" +approvers: + - "@deads2k" + +##### WARNING !!! ###### +# prr-approvers has been moved to its own location +# You should create your own in keps/prod-readiness +# Please make a copy of keps/prod-readiness/template/nnnn.yaml +# to keps/prod-readiness/sig-xxxxx/00000.yaml (replace with kep number) +#prr-approvers: + +see-also: +replaces: + +# The target maturity stage in the current dev cycle for this KEP. +stage: alpha + +# The most recent milestone for which work toward delivery of this KEP has been +# done. This can be the current (upcoming) milestone, if it is being actively +# worked on. +latest-milestone: "v1.23" + +# The milestone at which this feature was, or is targeted to be, at each stage. +milestone: + alpha: "v1.23" + beta: "v1.24" + stable: "v1.25" + +# The following PRR answers are required at alpha release +# List the feature gate name and the components for which it must be enabled +feature-gates: + - name: UnknownFieldValidation + components: + - kube-apiserver +disable-supported: true + +# The following PRR answers are required at beta release +metrics: