Skip to content

Commit

Permalink
Fix default User-Agent when using custom headers
Browse files Browse the repository at this point in the history
When using non-nil hdr parameter with c.Request method, and not setting
User-Agent header explicitly, it would be left unset (when it should've
been set to c.UserAgent). This means that Go HTTP client would use the default
"Go-http-client/2.0" string.

Suppressing the header altogether is still possible by setting it to
empty string.
  • Loading branch information
WGH- committed Mar 10, 2021
1 parent e4729d0 commit 84585f9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
5 changes: 4 additions & 1 deletion colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,10 @@ func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, c
}

if hdr == nil {
hdr = http.Header{"User-Agent": []string{c.UserAgent}}
hdr = http.Header{}
}
if _, ok := hdr["User-Agent"]; !ok {
hdr.Set("User-Agent", c.UserAgent)
}
rc, ok := requestData.(io.ReadCloser)
if !ok && requestData != nil {
Expand Down
11 changes: 11 additions & 0 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,17 @@ func TestUserAgent(t *testing.T) {
t.Errorf("mismatched User-Agent (nil hdr): got=%q want=%q", got, want)
}
}()
func() {
c := NewCollector(UserAgent(exampleUserAgent1))
c.OnResponse(func(resp *Response) {
receivedUserAgent = string(resp.Body)
})

c.Request("GET", ts.URL+"/user_agent", nil, nil, http.Header{})
if got, want := receivedUserAgent, exampleUserAgent1; got != want {
t.Errorf("mismatched User-Agent (non-nil hdr): got=%q want=%q", got, want)
}
}()
func() {
c := NewCollector(UserAgent(exampleUserAgent1))
c.OnResponse(func(resp *Response) {
Expand Down

0 comments on commit 84585f9

Please sign in to comment.