Skip to content

Commit

Permalink
Check Content-Length against minSize and passthrough writes if no gzip (
Browse files Browse the repository at this point in the history
nytimes#71)

Currently there's no way to generate a HEAD response with the correct
headers as the GET unless you set the minSize as 0, but then gzip headers
will be written in Close. Instead, allow a Write(nil) that will set the
correct headers based on the Content-Length/Content-Type headers and only
initialize a writer if there is a non-zero-length Write. If the
Content-Length cannot be determined, you cannot generate the response
because it cannot know if minSize would've been met.

Additionally, if we determined that the request should not be compressed
we should passthrough writes immediately rather than waiting until
Close.

Fixes nytimes#70
Fixes nytimes#64
  • Loading branch information
jameshartig authored and jprobinson committed Aug 14, 2018
1 parent 5032c88 commit c551b6c
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 72 deletions.
114 changes: 78 additions & 36 deletions gzip.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const (
// The examples seem to indicate that it is.
DefaultQValue = 1.0

// DefaultMinSize is the default minimum size until we enable gzip compression.
// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
// That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
Expand Down Expand Up @@ -82,6 +83,7 @@ type GzipResponseWriter struct {

minSize int // Specifed the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
buf []byte // Holds the first part of the write before reaching the minSize or the end of the write.
ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter.

contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty.
}
Expand All @@ -96,38 +98,56 @@ func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {

// Write appends data to the gzip writer.
func (w *GzipResponseWriter) Write(b []byte) (int, error) {
// If content type is not set.
if _, ok := w.Header()[contentType]; !ok {
// It infer it from the uncompressed body.
w.Header().Set(contentType, http.DetectContentType(b))
}

// GZIP responseWriter is initialized. Use the GZIP responseWriter.
if w.gw != nil {
n, err := w.gw.Write(b)
return n, err
return w.gw.Write(b)
}

// If we have already decided not to use GZIP, immediately passthrough.
if w.ignore {
return w.ResponseWriter.Write(b)
}

// Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
// On the first write, w.buf changes from nil to a valid slice
w.buf = append(w.buf, b...)

// If the global writes are bigger than the minSize and we're about to write
// a response containing a content type we want to handle, enable
// compression.
if len(w.buf) >= w.minSize && handleContentType(w.contentTypes, w) && w.Header().Get(contentEncoding) == "" {
err := w.startGzip()
if err != nil {
return 0, err
}
// If they provided a Content-Length, store that to check against minSize.
var cl int
if cls := w.Header().Get(contentLength); cls != "" {
cl, _ = strconv.Atoi(cls)
}

// Only continue if they didn't already choose an encoding.
if w.Header().Get(contentEncoding) == "" {
// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
if len(w.buf) < w.minSize && cl == 0 {
return len(b), nil
}
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
if cl >= w.minSize || len(w.buf) >= w.minSize {
// If a Content-Type wasn't specified, infer it from the current buffer.
if _, ok := w.Header()[contentType]; !ok {
w.Header().Set(contentType, http.DetectContentType(w.buf))
}
// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
if handleContentType(w.contentTypes, w) {
if err := w.startGzip(); err != nil {
return 0, err
}
return len(b), nil
}
}
}
// If we got here, we should not GZIP this response.
if err := w.startPlain(); err != nil {
return 0, err
}
return len(b), nil
}

// startGzip initialize any GZIP specific informations.
// startGzip initializes a GZIP writer and writes the buffer.
func (w *GzipResponseWriter) startGzip() error {

// Set the GZIP header.
w.Header().Set(contentEncoding, "gzip")

Expand All @@ -141,20 +161,43 @@ func (w *GzipResponseWriter) startGzip() error {
w.ResponseWriter.WriteHeader(w.code)
}

// Initialize the GZIP response.
w.init()
// Initialize and flush the buffer into the gzip response if there are any bytes.
// If there aren't any, we shouldn't initialize it yet because on Close it will
// write the gzip header even if nothing was ever written.
if len(w.buf) > 0 {
// Initialize the GZIP response.
w.init()
n, err := w.gw.Write(w.buf)

// Flush the buffer into the gzip response.
n, err := w.gw.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
return err
}
return nil
}

// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
func (w *GzipResponseWriter) startPlain() error {
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
}
w.ignore = true
// If Write was never called then don't call Write on the underlying ResponseWriter.
if w.buf == nil {
return nil
}
n, err := w.ResponseWriter.Write(w.buf)
w.buf = nil
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
return io.ErrShortWrite
err = io.ErrShortWrite
}

w.buf = nil
return err
}

Expand All @@ -177,19 +220,18 @@ func (w *GzipResponseWriter) init() {

// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
func (w *GzipResponseWriter) Close() error {
if w.ignore {
return nil
}

if w.gw == nil {
// Gzip not trigged yet, write out regular response.
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
}
if w.buf != nil {
_, writeErr := w.ResponseWriter.Write(w.buf)
// Returns the error if any at write.
if writeErr != nil {
return fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", writeErr.Error())
}
// GZIP not triggered yet, write out regular response.
err := w.startPlain()
// Returns the error if any at write.
if err != nil {
err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
}
return nil
return err
}

err := w.gw.Close()
Expand Down
107 changes: 71 additions & 36 deletions gzip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ func TestParseEncodings(t *testing.T) {
examples := map[string]codings{

// Examples from RFC 2616
"compress, gzip": {"compress": 1.0, "gzip": 1.0},
"": {},
"*": {"*": 1.0},
"compress, gzip": {"compress": 1.0, "gzip": 1.0},
"": {},
"*": {"*": 1.0},
"compress;q=0.5, gzip;q=1.0": {"compress": 0.5, "gzip": 1.0},
"gzip;q=1.0, identity; q=0.5, *;q=0": {"gzip": 1.0, "identity": 0.5, "*": 0.0},

Expand Down Expand Up @@ -158,18 +158,23 @@ func TestGzipHandlerNoBody(t *testing.T) {
tests := []struct {
statusCode int
contentEncoding string
bodyLen int
emptyBody bool
body []byte
}{
// Body must be empty.
{http.StatusNoContent, "", 0},
{http.StatusNotModified, "", 0},
{http.StatusNoContent, "", true, nil},
{http.StatusNotModified, "", true, nil},
// Body is going to get gzip'd no matter what.
{http.StatusOK, "", 0},
{http.StatusOK, "", true, []byte{}},
{http.StatusOK, "gzip", false, []byte(testBody)},
}

for num, test := range tests {
handler := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(test.statusCode)
if test.body != nil {
w.Write(test.body)
}
}))

rec := httptest.NewRecorder()
Expand All @@ -194,16 +199,31 @@ func TestGzipHandlerNoBody(t *testing.T) {
header := rec.Header()
assert.Equal(t, test.contentEncoding, header.Get("Content-Encoding"), fmt.Sprintf("for test iteration %d", num))
assert.Equal(t, "Accept-Encoding", header.Get("Vary"), fmt.Sprintf("for test iteration %d", num))
assert.Equal(t, test.bodyLen, len(body), fmt.Sprintf("for test iteration %d", num))
if test.emptyBody {
assert.Empty(t, body, fmt.Sprintf("for test iteration %d", num))
} else {
assert.NotEmpty(t, body, fmt.Sprintf("for test iteration %d", num))
assert.NotEqual(t, test.body, body, fmt.Sprintf("for test iteration %d", num))
}
}
}

func TestGzipHandlerContentLength(t *testing.T) {
b := []byte(testBody)
handler := GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Length", strconv.Itoa(len(b)))
w.Write(b)
}))
testBodyBytes := []byte(testBody)
tests := []struct {
bodyLen int
bodies [][]byte
emptyBody bool
}{
{len(testBody), [][]byte{testBodyBytes}, false},
// each of these writes is less than the DefaultMinSize
{len(testBody), [][]byte{testBodyBytes[:200], testBodyBytes[200:]}, false},
// without a defined Content-Length it should still gzip
{0, [][]byte{testBodyBytes[:200], testBodyBytes[200:]}, false},
// simulate a HEAD request with an empty write (to populate headers)
{len(testBody), [][]byte{nil}, true},
}

// httptest.NewRecorder doesn't give you access to the Content-Length
// header so instead, we create a server on a random port and make
// a request to that instead
Expand All @@ -213,35 +233,50 @@ func TestGzipHandlerContentLength(t *testing.T) {
}
defer ln.Close()
srv := &http.Server{
Handler: handler,
Handler: nil,
}
go srv.Serve(ln)

req := &http.Request{
Method: "GET",
URL: &url.URL{Path: "/", Scheme: "http", Host: ln.Addr().String()},
Header: make(http.Header),
Close: true,
}
req.Header.Set("Accept-Encoding", "gzip")
res, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("Unexpected error making http request: %v", err)
}
defer res.Body.Close()
for num, test := range tests {
srv.Handler = GzipHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if test.bodyLen > 0 {
w.Header().Set("Content-Length", strconv.Itoa(test.bodyLen))
}
for _, b := range test.bodies {
w.Write(b)
}
}))
req := &http.Request{
Method: "GET",
URL: &url.URL{Path: "/", Scheme: "http", Host: ln.Addr().String()},
Header: make(http.Header),
Close: true,
}
req.Header.Set("Accept-Encoding", "gzip")
res, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("Unexpected error making http request in test iteration %d: %v", num, err)
}
defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Fatalf("Unexpected error reading response body: %v", err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Fatalf("Unexpected error reading response body in test iteration %d: %v", num, err)
}

l, err := strconv.Atoi(res.Header.Get("Content-Length"))
if err != nil {
t.Fatalf("Unexpected error parsing Content-Length: %v", err)
l, err := strconv.Atoi(res.Header.Get("Content-Length"))
if err != nil {
t.Fatalf("Unexpected error parsing Content-Length in test iteration %d: %v", num, err)
}
if test.emptyBody {
assert.Empty(t, body, fmt.Sprintf("for test iteration %d", num))
assert.Equal(t, 0, l, fmt.Sprintf("for test iteration %d", num))
} else {
assert.Len(t, body, l, fmt.Sprintf("for test iteration %d", num))
}
assert.Equal(t, "gzip", res.Header.Get("Content-Encoding"), fmt.Sprintf("for test iteration %d", num))
assert.NotEqual(t, test.bodyLen, l, fmt.Sprintf("for test iteration %d", num))
}
assert.Len(t, body, l)
assert.Equal(t, "gzip", res.Header.Get("Content-Encoding"))
assert.NotEqual(t, b, body)
}

func TestGzipHandlerMinSizeMustBePositive(t *testing.T) {
Expand Down

0 comments on commit c551b6c

Please sign in to comment.