From 655fe82a95f7eb7c7771062457178297ad516d7d Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Tue, 19 Oct 2021 04:31:52 -0400 Subject: [PATCH] reverseproxy: New `copy_response` handler for `handle_response` routes Followup to #4298 and #4388. This adds a new `copy_response` handler which may only be used in `reverse_proxy`'s `handle_response` routes, which can be used to actually copy the proxy response downstream. Previously, if `handle_response` was used (with routes, not the status code mode), it was impossible to use the upstream's response body at all, because we would always close the body, expecting the routes to write a new body from scratch. To implement this, I had to refactor `h.reverseProxy()` to move all the code that came after the `HandleResponse` loop into a new function. This new function `h.finalizeResponse()` takes care of preparing the response by removing extra headers, dealing with trailers, then copying the headers and body downstream. Since basically what we want `copy_response` to do is invoke `h.finalizeResponse()` at a configurable point in time, we need to pass down the proxy handler, the response, and some other state via a new `req.WithContext(ctx)`. Wrapping a new context is pretty much the only way we have to jump a few layers in the HTTP middleware chain and let a handler pick up this information. Feels a bit dirty, but it works. Also fixed a bug with the `http.reverse_proxy.upstream.duration` placeholder, it always had the same duration as `http.reverse_proxy.upstream.latency`, but the former was meant to be the time taken for the roundtrip _plus_ copying/writing the response. --- caddyconfig/httpcaddyfile/directives.go | 1 + .../reverse_proxy_handle_response.txt | 17 +++- modules/caddyhttp/reverseproxy/caddyfile.go | 41 +++++++++ .../caddyhttp/reverseproxy/copyresponse.go | 88 +++++++++++++++++++ .../caddyhttp/reverseproxy/reverseproxy.go | 80 +++++++++++++++-- 5 files changed, 219 insertions(+), 8 deletions(-) create mode 100644 modules/caddyhttp/reverseproxy/copyresponse.go diff --git a/caddyconfig/httpcaddyfile/directives.go b/caddyconfig/httpcaddyfile/directives.go index 9da205e2ec6..faa2e5a04bf 100644 --- a/caddyconfig/httpcaddyfile/directives.go +++ b/caddyconfig/httpcaddyfile/directives.go @@ -65,6 +65,7 @@ var directiveOrder = []string{ // handlers that typically respond to requests "abort", "error", + "copy_response", "respond", "metrics", "reverse_proxy", diff --git a/caddytest/integration/caddyfile_adapt/reverse_proxy_handle_response.txt b/caddytest/integration/caddyfile_adapt/reverse_proxy_handle_response.txt index 35d9631a78c..cf25d56ba66 100644 --- a/caddytest/integration/caddyfile_adapt/reverse_proxy_handle_response.txt +++ b/caddytest/integration/caddyfile_adapt/reverse_proxy_handle_response.txt @@ -19,7 +19,8 @@ reverse_proxy 127.0.0.1:65535 { } handle_response { - respond "Any! This should be last in the JSON!" + header Foo "This should be last in the JSON!" + copy_response 404 } @403 { @@ -168,8 +169,18 @@ reverse_proxy 127.0.0.1:65535 { { "handle": [ { - "body": "Any! This should be last in the JSON!", - "handler": "static_response" + "handler": "headers", + "response": { + "set": { + "Foo": [ + "This should be last in the JSON!" + ] + } + } + }, + { + "handler": "copy_response", + "status_code": 404 } ] } diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go index c7f555f8a44..a9220088e9b 100644 --- a/modules/caddyhttp/reverseproxy/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/caddyfile.go @@ -34,6 +34,7 @@ import ( func init() { httpcaddyfile.RegisterHandlerDirective("reverse_proxy", parseCaddyfile) + httpcaddyfile.RegisterHandlerDirective("copy_response", parseCopyResponseCaddyfile) } func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { @@ -1024,6 +1025,46 @@ func (h *HTTPTransport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { return nil } +func parseCopyResponseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { + crh := new(CopyResponseHandler) + err := crh.UnmarshalCaddyfile(h.Dispenser) + if err != nil { + return nil, err + } + return crh, nil +} + +// UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax: +// +// copy_response [] [] { +// status +// } +// +func (h *CopyResponseHandler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { + for d.Next() { + args := d.RemainingArgs() + if len(args) == 1 { + if num, err := strconv.Atoi(args[0]); err == nil && num > 0 { + h.StatusCode = caddyhttp.WeakString(args[0]) + break + } + } + + for d.NextBlock(0) { + switch d.Val() { + case "status": + if !d.NextArg() { + return d.ArgErr() + } + h.StatusCode = caddyhttp.WeakString(d.Val()) + default: + return d.Errf("unrecognized subdirective '%s'", d.Val()) + } + } + } + return nil +} + const matcherPrefix = "@" // Interface guards diff --git a/modules/caddyhttp/reverseproxy/copyresponse.go b/modules/caddyhttp/reverseproxy/copyresponse.go new file mode 100644 index 00000000000..d7ac7f7c951 --- /dev/null +++ b/modules/caddyhttp/reverseproxy/copyresponse.go @@ -0,0 +1,88 @@ +// Copyright 2015 Matthew Holt and The Caddy Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package reverseproxy + +import ( + "fmt" + "net/http" + "strconv" + + "github.com/caddyserver/caddy/v2" + "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" + "github.com/caddyserver/caddy/v2/modules/caddyhttp" +) + +func init() { + caddy.RegisterModule(CopyResponseHandler{}) +} + +// CopyResponseHandler is a special HTTP handler which may only be used +// within reverse_proxy's handle_response routes, to copy the response +// from the +type CopyResponseHandler struct { + // To write the upstream response's body but with a different + // status code, set this field to the desired status code. + StatusCode caddyhttp.WeakString `json:"status_code,omitempty"` + + ctx caddy.Context +} + +// CaddyModule returns the Caddy module information. +func (CopyResponseHandler) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "http.handlers.copy_response", + New: func() caddy.Module { return new(CopyResponseHandler) }, + } +} + +// Provision ensures that h is set up properly before use. +func (h *CopyResponseHandler) Provision(ctx caddy.Context) error { + h.ctx = ctx + return nil +} + +func (h CopyResponseHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request, _ caddyhttp.Handler) error { + repl := req.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) + hrc, ok := req.Context().Value(proxyHandleResponseContextCtxKey).(*handleResponseContext) + + // don't allow this to be used outside of handle_response routes + if !ok { + return caddyhttp.Error(http.StatusInternalServerError, + fmt.Errorf("cannot use 'copy_response' outside of reverse_proxy's handle_response routes")) + } + + // allow a custom status code to be written; otherwise the + // status code from the upstream resposne is written + if codeStr := h.StatusCode.String(); codeStr != "" { + intVal, err := strconv.Atoi(repl.ReplaceAll(codeStr, "")) + if err != nil { + return caddyhttp.Error(http.StatusInternalServerError, err) + } + hrc.response.StatusCode = intVal + } + + // make sure the reverse_proxy handler doesn't try to call + // finalizeResponse again after we've already done it here. + hrc.isFinalized = true + + // write the response + return hrc.handler.finalizeResponse(rw, req, hrc.response, repl, hrc.start, hrc.logger, false) +} + +// Interface guards +var ( + _ caddyhttp.MiddlewareHandler = (*CopyResponseHandler)(nil) + _ caddyfile.Unmarshaler = (*CopyResponseHandler)(nil) +) diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go index 36dfbfe641e..fe53447e3af 100644 --- a/modules/caddyhttp/reverseproxy/reverseproxy.go +++ b/modules/caddyhttp/reverseproxy/reverseproxy.go @@ -667,12 +667,33 @@ func (h *Handler) reverseProxy(rw http.ResponseWriter, req *http.Request, repl * h.logger.Debug("handling response", zap.Int("handler", i)) + // we make some data available via request context to child routes + // so that they may inherit some options and functions from the + // handler, and be able to copy the response. + hrc := &handleResponseContext{ + handler: h, + response: res, + start: start, + logger: logger, + } + ctx := req.Context() + ctx = context.WithValue(ctx, proxyHandleResponseContextCtxKey, hrc) + // pass the request through the response handler routes - routeErr := rh.Routes.Compile(next).ServeHTTP(rw, req) + routeErr := rh.Routes.Compile(next).ServeHTTP(rw, req.WithContext(ctx)) + + // if the response handler routes already finalized the response, + // we can return early. It should be finalized if the routes executed + // included a copy_response handler. If a fresh response was written + // by the routes instead, then we still need to finalize the response + // without copying the body. + if routeErr == nil && hrc.isFinalized { + return nil + } - // always close the response body afterwards since it's expected + // always close the response body afterwards, since it's expected // that the response handler routes will have written to the - // response writer with a new body + // response writer with a new body, if it wasn't already finalized. res.Body.Close() bodyClosed = true @@ -681,8 +702,25 @@ func (h *Handler) reverseProxy(rw http.ResponseWriter, req *http.Request, repl * // the roundtrip was successful and to not retry return roundtripSucceeded{routeErr} } + + // we've already closed the body, so there's no use allowing + // another response handler to run as well + break } + return h.finalizeResponse(rw, req, res, repl, start, logger, bodyClosed) +} + +// finalizeResponse prepares and copies the response. +func (h Handler) finalizeResponse( + rw http.ResponseWriter, + req *http.Request, + res *http.Response, + repl *caddy.Replacer, + start time.Time, + logger *zap.Logger, + bodyClosed bool, +) error { // deal with 101 Switching Protocols responses: (WebSocket, h2c, etc) if res.StatusCode == http.StatusSwitchingProtocols { h.handleUpgradeResponse(logger, rw, req, res) @@ -718,7 +756,7 @@ func (h *Handler) reverseProxy(rw http.ResponseWriter, req *http.Request, repl * rw.WriteHeader(res.StatusCode) if !bodyClosed { - err = h.copyResponse(rw, res.Body, h.flushInterval(req, res)) + err := h.copyResponse(rw, res.Body, h.flushInterval(req, res)) res.Body.Close() // close now, instead of defer, to populate res.Trailer if err != nil { // we're streaming the response and we've already written headers, so @@ -740,7 +778,7 @@ func (h *Handler) reverseProxy(rw http.ResponseWriter, req *http.Request, repl * } // total duration spent proxying, including writing response body - repl.Set("http.reverse_proxy.upstream.duration", duration) + repl.Set("http.reverse_proxy.upstream.duration", time.Since(start)) if len(res.Trailer) == announcedTrailers { copyHeader(rw.Header(), res.Trailer) @@ -984,6 +1022,38 @@ var bufPool = sync.Pool{ }, } +// HandleResponseContext carries some contextual information about the +// the current proxy handling. +type handleResponseContext struct { + // handler is the active proxy handler instance, so that + // routes like copy_response may inherit some config + // options and have access to handler methods. + handler *Handler + + // response is the actual response received from the proxy + // roundtrip, to potentially be copied if a copy_response + // handler is in the handle_response routes. + response *http.Response + + // start is the time just before the proxy roundtrip was + // performed, used for logging. + start time.Time + + // logger is the prepared logger which is used to write logs + // with the request, duration, and selected upstream attached. + logger *zap.Logger + + // isFinalized is whether the response has been finalized, + // i.e. copied and closed, to make sure that it doesn't + // happen twice. + isFinalized bool +} + +// proxyHandleResponseContextCtxKey is the context key for the active proxy handler +// so that handle_response routes can inherit some config options +// from the proxy handler. +const proxyHandleResponseContextCtxKey caddy.CtxKey = "reverse_proxy_handle_response_context" + // Interface guards var ( _ caddy.Provisioner = (*Handler)(nil)