Skip to content

Commit

Permalink
[fix] use latest htmlquery - closes gocolly#280
Browse files Browse the repository at this point in the history
  • Loading branch information
asciimoo committed Feb 4, 2019
1 parent 85daab4 commit c64a584
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
5 changes: 2 additions & 3 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
"sync/atomic"
"time"

"golang.org/x/net/html"
"google.golang.org/appengine/urlfetch"

"github.com/PuerkitoBio/goquery"
Expand Down Expand Up @@ -988,7 +987,7 @@ func (c *Collector) handleOnXML(resp *Response) error {
}

for _, cc := range c.xmlCallbacks {
htmlquery.FindEach(doc, cc.Query, func(i int, n *html.Node) {
for _, n := range htmlquery.Find(doc, cc.Query) {
e := NewXMLElementFromHTMLNode(resp, n)
if c.debugger != nil {
c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{
Expand All @@ -997,7 +996,7 @@ func (c *Collector) handleOnXML(resp *Response) error {
}))
}
cc.Function(e)
})
}
}
} else if strings.Contains(contentType, "xml") {
doc, err := xmlquery.Parse(bytes.NewBuffer(resp.Body))
Expand Down
8 changes: 4 additions & 4 deletions xmlelement.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ func (h *XMLElement) ChildAttr(xpathQuery, attrName string) string {
func (h *XMLElement) ChildAttrs(xpathQuery, attrName string) []string {
var res []string
if h.isHTML {
htmlquery.FindEach(h.DOM.(*html.Node), xpathQuery, func(i int, child *html.Node) {
for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) {
for _, attr := range child.Attr {
if attr.Key == attrName {
res = append(res, strings.TrimSpace(attr.Val))
}
}
})
}
} else {
xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) {
for _, attr := range child.Attr {
Expand All @@ -158,9 +158,9 @@ func (h *XMLElement) ChildAttrs(xpathQuery, attrName string) []string {
func (h *XMLElement) ChildTexts(xpathQuery string) []string {
texts := make([]string, 0)
if h.isHTML {
htmlquery.FindEach(h.DOM.(*html.Node), xpathQuery, func(i int, child *html.Node) {
for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) {
texts = append(texts, strings.TrimSpace(htmlquery.InnerText(child)))
})
}
} else {
xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) {
texts = append(texts, strings.TrimSpace(child.InnerText()))
Expand Down

0 comments on commit c64a584

Please sign in to comment.