Skip to content

Commit

Permalink
Merge pull request gocolly#171 from mumugoah/master
Browse files Browse the repository at this point in the history
Read gzipped response and request marshal include headers
  • Loading branch information
asciimoo authored Jun 27, 2018
2 parents e7ff7f4 + f740ece commit 2e03a39
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 11 deletions.
2 changes: 1 addition & 1 deletion colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ func (c *Collector) UnmarshalRequest(r []byte) (*Request, error) {
Body: bytes.NewReader(req.Body),
Ctx: ctx,
ID: atomic.AddUint32(&c.requestCount, 1),
Headers: &http.Header{},
Headers: &req.Headers,
collector: c,
}, nil
}
Expand Down
8 changes: 8 additions & 0 deletions http_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
"sync"
"time"

"compress/gzip"

"github.com/gobwas/glob"
)

Expand Down Expand Up @@ -187,6 +189,12 @@ func (h *httpBackend) Do(request *http.Request, bodySize int) (*Response, error)
if bodySize > 0 {
bodyReader = io.LimitReader(bodyReader, int64(bodySize))
}
if res.Uncompressed && res.Header.Get("Content-Encoding") == "gzip" {
bodyReader, err = gzip.NewReader(bodyReader)
if err != nil {
return nil, err
}
}
body, err := ioutil.ReadAll(bodyReader)
defer res.Body.Close()
if err != nil {
Expand Down
22 changes: 12 additions & 10 deletions request.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ type Request struct {
}

type serializableRequest struct {
URL string
Method string
Body []byte
ID uint32
Ctx map[string]interface{}
URL string
Method string
Body []byte
ID uint32
Ctx map[string]interface{}
Headers http.Header
}

// New creates a new request with the context of the original request
Expand Down Expand Up @@ -164,10 +165,11 @@ func (r *Request) Marshal() ([]byte, error) {
}
}
return json.Marshal(&serializableRequest{
URL: r.URL.String(),
Method: r.Method,
Body: body,
ID: r.ID,
Ctx: ctx,
URL: r.URL.String(),
Method: r.Method,
Body: body,
ID: r.ID,
Ctx: ctx,
Headers: *r.Headers,
})
}

0 comments on commit 2e03a39

Please sign in to comment.