Skip to content

Commit

Permalink
net/http: support gzip, x-gzip Transfer-Encodings
Browse files Browse the repository at this point in the history
Support "gzip" aka "x-gzip" as a transfer-encoding for
requests and responses as per RFC 7230 Section 3.3.1.

"gzip" and "x-gzip" are equivalents as requested by
RFC 7230 Section 4.2.3.

Transfer-Encoding is an on-fly property of the body
that can be applied by proxies, other servers and basically
any intermediary to transport the content e.g. across data centers
or backends/machine to machine that need compression.

For this change, "gzip" is both explicitly and implicitly combined
with transfer-encoding "chunked" in an ordering such as:

    Transfer-Encoding: gzip, chunked

and NOT

    Transfer-Encoding: chunked, gzip

Obviously the latter form is counter-intuitive for streaming.
Thus "chunked" is the last value to appear in that transfer-encoding header,
if explicitly included.

When parsing the response, the chunked body is concatenated as "chunked" does,
before finally being decompressed as "gzip".

A chunked and compressed body would typically look like this:

<LENGTH_1>\r\n<CHUNK_1_GZIPPED_BODY>\r\n<LENGTH_2>\r\n<CHUNK_2_GZIPPED_BODY>\0\r\n

which when being processed we would contentate

    <FULL_BODY>  := <CHUNK_1_GZIPPED_BODY> + <CHUNK_2_GZIPPED_BODY> + ...

and then finally gunzip it
    <FINAL_BODY> := gunzip(<FULL_BODY>)

If a "chunked" transfer-encoding is NOT applied but "gzip" is applied,
we implicitly assume that they requested using "chunked" at the end.
This is as per the recommendation of RFC 3.3.1. which explicitly says
that for:

* Request:
"  If any transfer coding
   other than chunked is applied to a request payload body, the sender
   MUST apply chunked as the final transfer coding to ensure that the
   message is properly framed."

* Response:
"  If any transfer coding other than
   chunked is applied to a response payload body, the sender MUST either
   apply chunked as the final transfer coding or terminate the message
   by closing the connection."

RELNOTE=yes

Fixes golang#29162

Change-Id: Icb8b8b838cf4119705605b29725cabb1fe258491
Reviewed-on: https://go-review.googlesource.com/c/go/+/166517
Run-TryBot: Emmanuel Odeke <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Brad Fitzpatrick <[email protected]>
  • Loading branch information
odeke-em committed Nov 8, 2019
1 parent b2b0992 commit e6c12c3
Show file tree
Hide file tree
Showing 2 changed files with 394 additions and 15 deletions.
125 changes: 112 additions & 13 deletions src/net/http/transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package http
import (
"bufio"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -466,6 +467,34 @@ func suppressedHeaders(status int) []string {
return nil
}

// proxyingReadCloser is a composite type that accepts and proxies
// io.Read and io.Close calls to its respective Reader and Closer.
//
// It is composed of:
// a) a top-level reader e.g. the result of decompression
// b) a symbolic Closer e.g. the result of decompression, the
// original body and the connection itself.
type proxyingReadCloser struct {
io.Reader
io.Closer
}

// multiCloser implements io.Closer and allows a bunch of io.Closer values
// to all be closed once.
// Example usage is with proxyingReadCloser if we are decompressing a response
// body on the fly and would like to close both *gzip.Reader and underlying body.
type multiCloser []io.Closer

func (mc multiCloser) Close() error {
var err error
for _, c := range mc {
if err1 := c.Close(); err1 != nil && err == nil {
err = err1
}
}
return err
}

// msg is *Request or *Response.
func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
t := &transferReader{RequestMethod: "GET"}
Expand Down Expand Up @@ -543,7 +572,7 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
// Prepare body reader. ContentLength < 0 means chunked encoding
// or close connection when finished, since multipart is not supported yet
switch {
case chunked(t.TransferEncoding):
case chunked(t.TransferEncoding) || implicitlyChunked(t.TransferEncoding):
if noResponseBodyExpected(t.RequestMethod) || !bodyAllowedForStatus(t.StatusCode) {
t.Body = NoBody
} else {
Expand All @@ -564,6 +593,21 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
}
}

// Finally if "gzip" was one of the requested transfer-encodings,
// we'll unzip the concatenated body/payload of the request.
// TODO: As we support more transfer-encodings, extract
// this code and apply the un-codings in reverse.
if t.Body != NoBody && gzipped(t.TransferEncoding) {
zr, err := gzip.NewReader(t.Body)
if err != nil {
return fmt.Errorf("http: failed to gunzip body: %v", err)
}
t.Body = &proxyingReadCloser{
Reader: zr,
Closer: multiCloser{zr, t.Body},
}
}

// Unify output
switch rr := msg.(type) {
case *Request:
Expand All @@ -583,8 +627,41 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
return nil
}

// Checks whether chunked is part of the encodings stack
func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" }
// Checks whether chunked is the last part of the encodings stack
func chunked(te []string) bool { return len(te) > 0 && te[len(te)-1] == "chunked" }

// implicitlyChunked is a helper to check for implicity of chunked, because
// RFC 7230 Section 3.3.1 says that the sender MUST apply chunked as the final
// payload body to ensure that the message is framed for both the request
// and the body. Since "identity" is incompatabile with any other transformational
// encoding cannot co-exist, the presence of "identity" will cause implicitlyChunked
// to return false.
func implicitlyChunked(te []string) bool {
if len(te) == 0 { // No transfer-encodings passed in, so not implicity chunked.
return false
}
for _, tei := range te {
if tei == "identity" {
return false
}
}
return true
}

func isGzipTransferEncoding(tei string) bool {
// RFC 7230 4.2.3 requests that "x-gzip" SHOULD be considered the same as "gzip".
return tei == "gzip" || tei == "x-gzip"
}

// Checks where either of "gzip" or "x-gzip" are contained in transfer encodings.
func gzipped(te []string) bool {
for _, tei := range te {
if isGzipTransferEncoding(tei) {
return true
}
}
return false
}

// Checks whether the encoding is explicitly "identity".
func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
Expand Down Expand Up @@ -620,25 +697,47 @@ func (t *transferReader) fixTransferEncoding() error {

encodings := strings.Split(raw[0], ",")
te := make([]string, 0, len(encodings))
// TODO: Even though we only support "identity" and "chunked"
// encodings, the loop below is designed with foresight. One
// invariant that must be maintained is that, if present,
// chunked encoding must always come first.
for _, encoding := range encodings {

// When adding new encodings, please maintain the invariant:
// if chunked encoding is present, it must always
// come last and it must be applied only once.
// See RFC 7230 Section 3.3.1 Transfer-Encoding.
for i, encoding := range encodings {
encoding = strings.ToLower(strings.TrimSpace(encoding))
// "identity" encoding is not recorded

if encoding == "identity" {
// "identity" should not be mixed with other transfer-encodings/compressions
// because it means "no compression, no transformation".
if len(encodings) != 1 {
return &badStringError{`"identity" when present must be the only transfer encoding`, strings.Join(encodings, ",")}
}
// "identity" is not recorded.
break
}
if encoding != "chunked" {

switch {
case encoding == "chunked":
// "chunked" MUST ALWAYS be the last
// encoding as per the loop invariant.
// That is:
// Invalid: [chunked, gzip]
// Valid: [gzip, chunked]
if i+1 != len(encodings) {
return &badStringError{"chunked must be applied only once, as the last encoding", strings.Join(encodings, ",")}
}
// Supported otherwise.

case isGzipTransferEncoding(encoding):
// Supported

default:
return &unsupportedTEError{fmt.Sprintf("unsupported transfer encoding: %q", encoding)}
}

te = te[0 : len(te)+1]
te[len(te)-1] = encoding
}
if len(te) > 1 {
return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
}

if len(te) > 0 {
// RFC 7230 3.3.2 says "A sender MUST NOT send a
// Content-Length header field in any message that
Expand Down
Loading

0 comments on commit e6c12c3

Please sign in to comment.