net/http: support gzip, x-gzip Transfer-Encodings

Support "gzip" aka "x-gzip" as a transfer-encoding for requests and responses as per RFC 7230 Section 3.3.1. "gzip" and "x-gzip" are equivalents as requested by RFC 7230 Section 4.2.3. Transfer-Encoding is an on-fly property of the body that can be applied by proxies, other servers and basically any intermediary to transport the content e.g. across data centers or backends/machine to machine that need compression. For this change, "gzip" is both explicitly and implicitly combined with transfer-encoding "chunked" in an ordering such as: Transfer-Encoding: gzip, chunked and NOT Transfer-Encoding: chunked, gzip Obviously the latter form is counter-intuitive for streaming. Thus "chunked" is the last value to appear in that transfer-encoding header, if explicitly included. When parsing the response, the chunked body is concatenated as "chunked" does, before finally being decompressed as "gzip". A chunked and compressed body would typically look like this: <LENGTH_1>\r\n<CHUNK_1_GZIPPED_BODY>\r\n<LENGTH_2>\r\n<CHUNK_2_GZIPPED_BODY>\0\r\n which when being processed we would contentate <FULL_BODY> := <CHUNK_1_GZIPPED_BODY> + <CHUNK_2_GZIPPED_BODY> + ... and then finally gunzip it <FINAL_BODY> := gunzip(<FULL_BODY>) If a "chunked" transfer-encoding is NOT applied but "gzip" is applied, we implicitly assume that they requested using "chunked" at the end. This is as per the recommendation of RFC 3.3.1. which explicitly says that for: * Request: " If any transfer coding other than chunked is applied to a request payload body, the sender MUST apply chunked as the final transfer coding to ensure that the message is properly framed." * Response: " If any transfer coding other than chunked is applied to a response payload body, the sender MUST either apply chunked as the final transfer coding or terminate the message by closing the connection." RELNOTE=yes Fixes golang#29162 Change-Id: Icb8b8b838cf4119705605b29725cabb1fe258491 Reviewed-on: https://go-review.googlesource.com/c/go/+/166517 Run-TryBot: Emmanuel Odeke <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]>
open-fork · Nov 8, 2019 · e6c12c3 · e6c12c3
1 parent b2b0992
commit e6c12c3
Show file tree

Hide file tree

Showing 2 changed files with 394 additions and 15 deletions.
diff --git a/src/net/http/transfer.go b/src/net/http/transfer.go
@@ -7,6 +7,7 @@ package http
 import (
 	"bufio"
 	"bytes"
+	"compress/gzip"
 	"errors"
 	"fmt"
 	"io"
@@ -466,6 +467,34 @@ func suppressedHeaders(status int) []string {
 	return nil
 }
 
+// proxyingReadCloser is a composite type that accepts and proxies
+// io.Read and io.Close calls to its respective Reader and Closer.
+//
+// It is composed of:
+// a) a top-level reader e.g. the result of decompression
+// b) a symbolic Closer e.g. the result of decompression, the
+//    original body and the connection itself.
+type proxyingReadCloser struct {
+	io.Reader
+	io.Closer
+}
+
+// multiCloser implements io.Closer and allows a bunch of io.Closer values
+// to all be closed once.
+// Example usage is with proxyingReadCloser if we are decompressing a response
+// body on the fly and would like to close both *gzip.Reader and underlying body.
+type multiCloser []io.Closer
+
+func (mc multiCloser) Close() error {
+	var err error
+	for _, c := range mc {
+		if err1 := c.Close(); err1 != nil && err == nil {
+			err = err1
+		}
+	}
+	return err
+}
+
 // msg is *Request or *Response.
 func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
 	t := &transferReader{RequestMethod: "GET"}
@@ -543,7 +572,7 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
 	// Prepare body reader. ContentLength < 0 means chunked encoding
 	// or close connection when finished, since multipart is not supported yet
 	switch {
-	case chunked(t.TransferEncoding):
+	case chunked(t.TransferEncoding) || implicitlyChunked(t.TransferEncoding):
 		if noResponseBodyExpected(t.RequestMethod) || !bodyAllowedForStatus(t.StatusCode) {
 			t.Body = NoBody
 		} else {
@@ -564,6 +593,21 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
 		}
 	}
 
+	// Finally if "gzip" was one of the requested transfer-encodings,
+	// we'll unzip the concatenated body/payload of the request.
+	// TODO: As we support more transfer-encodings, extract
+	// this code and apply the un-codings in reverse.
+	if t.Body != NoBody && gzipped(t.TransferEncoding) {
+		zr, err := gzip.NewReader(t.Body)
+		if err != nil {
+			return fmt.Errorf("http: failed to gunzip body: %v", err)
+		}
+		t.Body = &proxyingReadCloser{
+			Reader: zr,
+			Closer: multiCloser{zr, t.Body},
+		}
+	}
+
 	// Unify output
 	switch rr := msg.(type) {
 	case *Request:
@@ -583,8 +627,41 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
 	return nil
 }
 
-// Checks whether chunked is part of the encodings stack
-func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" }
+// Checks whether chunked is the last part of the encodings stack
+func chunked(te []string) bool { return len(te) > 0 && te[len(te)-1] == "chunked" }
+
+// implicitlyChunked is a helper to check for implicity of chunked, because
+// RFC 7230 Section 3.3.1 says that the sender MUST apply chunked as the final
+// payload body to ensure that the message is framed for both the request
+// and the body. Since "identity" is incompatabile with any other transformational
+// encoding cannot co-exist, the presence of "identity" will cause implicitlyChunked
+// to return false.
+func implicitlyChunked(te []string) bool {
+	if len(te) == 0 { // No transfer-encodings passed in, so not implicity chunked.
+		return false
+	}
+	for _, tei := range te {
+		if tei == "identity" {
+			return false
+		}
+	}
+	return true
+}
+
+func isGzipTransferEncoding(tei string) bool {
+	// RFC 7230 4.2.3 requests that "x-gzip" SHOULD be considered the same as "gzip".
+	return tei == "gzip" || tei == "x-gzip"
+}
+
+// Checks where either of "gzip" or "x-gzip" are contained in transfer encodings.
+func gzipped(te []string) bool {
+	for _, tei := range te {
+		if isGzipTransferEncoding(tei) {
+			return true
+		}
+	}
+	return false
+}
 
 // Checks whether the encoding is explicitly "identity".
 func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
@@ -620,25 +697,47 @@ func (t *transferReader) fixTransferEncoding() error {
 
 	encodings := strings.Split(raw[0], ",")
 	te := make([]string, 0, len(encodings))
-	// TODO: Even though we only support "identity" and "chunked"
-	// encodings, the loop below is designed with foresight. One
-	// invariant that must be maintained is that, if present,
-	// chunked encoding must always come first.
-	for _, encoding := range encodings {
+
+	// When adding new encodings, please maintain the invariant:
+	//   if chunked encoding is present, it must always
+	//   come last and it must be applied only once.
+	// See RFC 7230 Section 3.3.1 Transfer-Encoding.
+	for i, encoding := range encodings {
 		encoding = strings.ToLower(strings.TrimSpace(encoding))
-		// "identity" encoding is not recorded
+
 		if encoding == "identity" {
+			// "identity" should not be mixed with other transfer-encodings/compressions
+			// because it means "no compression, no transformation".
+			if len(encodings) != 1 {
+				return &badStringError{`"identity" when present must be the only transfer encoding`, strings.Join(encodings, ",")}
+			}
+			// "identity" is not recorded.
 			break
 		}
-		if encoding != "chunked" {
+
+		switch {
+		case encoding == "chunked":
+			// "chunked" MUST ALWAYS be the last
+			// encoding as per the  loop invariant.
+			// That is:
+			//     Invalid: [chunked, gzip]
+			//     Valid:   [gzip, chunked]
+			if i+1 != len(encodings) {
+				return &badStringError{"chunked must be applied only once, as the last encoding", strings.Join(encodings, ",")}
+			}
+			// Supported otherwise.
+
+		case isGzipTransferEncoding(encoding):
+			// Supported
+
+		default:
 			return &unsupportedTEError{fmt.Sprintf("unsupported transfer encoding: %q", encoding)}
 		}
+
 		te = te[0 : len(te)+1]
 		te[len(te)-1] = encoding
 	}
-	if len(te) > 1 {
-		return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
-	}
+
 	if len(te) > 0 {
 		// RFC 7230 3.3.2 says "A sender MUST NOT send a
 		// Content-Length header field in any message that