Skip to content

Commit

Permalink
[fix] xpath tests
Browse files Browse the repository at this point in the history
  • Loading branch information
asciimoo committed Feb 4, 2019
1 parent c64a584 commit ca690f8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
12 changes: 8 additions & 4 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ type Collector struct {
// without explicit charset declaration. This feature uses https://github.com/saintfish/chardet
DetectCharset bool
// RedirectHandler allows control on how a redirect will be managed
RedirectHandler func(req *http.Request, via []*http.Request) error
RedirectHandler func(req *http.Request, via []*http.Request) error
// CheckHead performs a HEAD request before every GET to pre-validate the response
CheckHead bool
store storage.Storage
debugger debug.Debugger
robotsMap map[string]*robotstxt.RobotsData
Expand Down Expand Up @@ -402,8 +404,10 @@ func (c *Collector) Appengine(ctx context.Context) {
// request to the URL specified in parameter.
// Visit also calls the previously provided callbacks
func (c *Collector) Visit(URL string) error {
if check := c.scrape(URL, "HEAD", 1, nil, nil, nil, true); check != nil {
return check
if c.CheckHead {
if check := c.scrape(URL, "HEAD", 1, nil, nil, nil, true); check != nil {
return check
}
}
return c.scrape(URL, "GET", 1, nil, nil, nil, true)
}
Expand Down Expand Up @@ -977,7 +981,7 @@ func (c *Collector) handleOnXML(resp *Response) error {
if err != nil {
return err
}
if e := htmlquery.FindOne(doc, "//base/@href"); e != nil {
if e := htmlquery.FindOne(doc, "//base"); e != nil {
for _, a := range e.Attr {
if a.Key == "href" {
resp.Request.baseURL, _ = url.Parse(a.Val)
Expand Down
2 changes: 1 addition & 1 deletion colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ func TestBaseTag(t *testing.T) {
c.Visit(ts.URL + "/base")

c2 := NewCollector()
c2.OnXML("//a/@href", func(e *XMLElement) {
c2.OnXML("//a", func(e *XMLElement) {
u := e.Request.AbsoluteURL(e.Attr("href"))
if u != "http://xy.com/z" {
t.Error("Invalid <base /> tag handling in OnXML: expected https://xy.com/z, got " + u)
Expand Down

0 comments on commit ca690f8

Please sign in to comment.