Skip to content

Commit

Permalink
Merge pull request gocolly#113 from sharmi/master
Browse files Browse the repository at this point in the history
Extending XmlElement with ChildTexts and handling edge cases in ChildText
  • Loading branch information
asciimoo authored Jul 10, 2018
2 parents f9e277f + d018346 commit 10350f9
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
31 changes: 27 additions & 4 deletions xmlelement.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,21 @@ func (h *XMLElement) Attr(k string) string {
// elements.
func (h *XMLElement) ChildText(xpathQuery string) string {
if h.isHTML {
return strings.TrimSpace(htmlquery.InnerText(htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery)))
child := htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery)
if child == nil {
return ""
}
return strings.TrimSpace(htmlquery.InnerText(child))
}
n := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery)
if n == nil {
child := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery)
if child == nil {
return ""
}
return strings.TrimSpace(n.InnerText())
return strings.TrimSpace(child.InnerText())

}


// ChildAttr returns the stripped text content of the first matching
// element's attribute.
func (h *XMLElement) ChildAttr(xpathQuery, attrName string) string {
Expand Down Expand Up @@ -147,3 +153,20 @@ func (h *XMLElement) ChildAttrs(xpathQuery, attrName string) []string {
}
return res
}

// ChildTexts returns an array of strings corresponding to child elements that match the xpath query.
// Each item in the array is the stripped text content of the corresponding matching child element.
func (h *XMLElement) ChildTexts(xpathQuery string) []string {
texts := make([]string, 0)
if h.isHTML {
htmlquery.FindEach(h.DOM.(*html.Node), xpathQuery, func(i int, child *html.Node) {
texts = append(texts, strings.TrimSpace(htmlquery.InnerText(child)))
})
} else {
xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) {
texts = append(texts, strings.TrimSpace(child.InnerText()))
})
}
return texts
}

23 changes: 20 additions & 3 deletions xmlelement_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

package colly_test

import (
"strings"
"testing"

import (
"github.com/antchfx/htmlquery"
"github.com/gocolly/colly"
"reflect"
"strings"
"testing"
)

// Borrowed from http://infohost.nmt.edu/tcc/help/pubs/xhtml/example.html
Expand Down Expand Up @@ -72,8 +73,24 @@ func TestChildText(t *testing.T) {
if text := xmlElem.ChildText("//p"); text != "This is a regular text paragraph." {
t.Fatalf("failed child tag test: %v != This is a regular text paragraph.", text)
}
if text := xmlElem.ChildText("//dl"); text != "" {
t.Fatalf("failed child tag test: %v != \"\"", text)
}
}

func TestChildTexts(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
expected := []string{"First bullet of a bullet list.", "This is the second bullet."}
if texts := xmlElem.ChildTexts("//li"); reflect.DeepEqual(texts, expected) == false {
t.Fatalf("failed child tags test: %v != %v", texts, expected)
}
if texts := xmlElem.ChildTexts("//dl"); reflect.DeepEqual(texts, make([]string, 0)) == false {
t.Fatalf("failed child tag test: %v != \"\"", texts)
}
}
func TestChildAttr(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
Expand Down

0 comments on commit 10350f9

Please sign in to comment.