[examples] Use functional options for NewCollector

shotokan · Jan 5, 2018 · b59af10 · b59af10
1 parent 09a4706
commit b59af10
Show file tree

Hide file tree

Showing 12 changed files with 54 additions and 54 deletions.
diff --git a/_examples/basic/basic.go b/_examples/basic/basic.go
@@ -8,10 +8,10 @@ import (
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
-
-	// Visit only domains: hackerspaces.org, wiki.hackerspaces.org
-	c.AllowedDomains = []string{"hackerspaces.org", "wiki.hackerspaces.org"}
+	c := colly.NewCollector(
+		// Visit only domains: hackerspaces.org, wiki.hackerspaces.org
+		colly.AllowedDomains("hackerspaces.org", "wiki.hackerspaces.org"),
+	)
 
 	// On every a element which has href attribute call callback
 	c.OnHTML("a[href]", func(e *colly.HTMLElement) {

diff --git a/_examples/coursera_courses/coursera_courses.go b/_examples/coursera_courses/coursera_courses.go
@@ -25,14 +25,14 @@ type Course struct {
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
+	c := colly.NewCollector(
+		// Visit only domains: coursera.org, www.coursera.org
+		colly.AllowedDomains("coursera.org", "www.coursera.org"),
 
-	// Visit only domains: coursera.org, www.coursera.org
-	c.AllowedDomains = []string{"coursera.org", "www.coursera.org"}
-
-	// Cache responses to prevent multiple download of pages
-	// even if the collector is restarted
-	c.CacheDir = "./coursera_cache"
+		// Cache responses to prevent multiple download of pages
+		// even if the collector is restarted
+		colly.CacheDir("./coursera_cache"),
+	)
 
 	// Create another collector to scrape course details
 	detailCollector := c.Clone()

diff --git a/_examples/instagram/instagram.go b/_examples/instagram/instagram.go
@@ -32,9 +32,10 @@ func main() {
 	instagramAccount := os.Args[1]
 	outputDir := fmt.Sprintf("./instagram_%s/", instagramAccount)
 
-	c := colly.NewCollector()
-	c.CacheDir = "./_instagram_cache/"
-	c.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+	c := colly.NewCollector(
+		colly.CacheDir("./_instagram_cache/"),
+		colly.UserAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"),
+	)
 
 	c.OnHTML("body > script:first-of-type", func(e *colly.HTMLElement) {
 		jsonData := e.Text[strings.Index(e.Text, "{") : len(e.Text)-1]

diff --git a/_examples/max_depth/max_depth.go b/_examples/max_depth/max_depth.go
@@ -8,11 +8,11 @@ import (
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
-
-	// MaxDepth is 1, so only the links on the scraped page
-	// is visited, and no further links are followed
-	c.MaxDepth = 1
+	c := colly.NewCollector(
+		// MaxDepth is 1, so only the links on the scraped page
+		// is visited, and no further links are followed
+		colly.MaxDepth(1),
+	)
 
 	// On every a element which has href attribute call callback
 	c.OnHTML("a[href]", func(e *colly.HTMLElement) {

diff --git a/_examples/multipart/multipart.go b/_examples/multipart/multipart.go
@@ -46,9 +46,7 @@ func main() {
 	// Start a single route http server to post an image to.
 	setupServer()
 
-	c := colly.NewCollector()
-	c.AllowURLRevisit = true
-	c.MaxDepth = 5
+	c := colly.NewCollector(colly.AllowURLRevisit(), colly.MaxDepth(5))
 
 	// On every a element which has href attribute call callback
 	c.OnHTML("html", func(e *colly.HTMLElement) {

diff --git a/_examples/openedx_courses/openedx_courses.go b/_examples/openedx_courses/openedx_courses.go
@@ -25,13 +25,14 @@ type Course struct {
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
-	// Using IndonesiaX as sample
-	c.AllowedDomains = []string{"indonesiax.co.id", "www.indonesiax.co.id"}
+	c := colly.NewCollector(
+		// Using IndonesiaX as sample
+		colly.AllowedDomains("indonesiax.co.id", "www.indonesiax.co.id"),
 
-	// Cache responses to prevent multiple download of pages
-	// even if the collector is restarted
-	c.CacheDir = "./cache"
+		// Cache responses to prevent multiple download of pages
+		// even if the collector is restarted
+		colly.CacheDir("./cache"),
+	)
 
 	courses := make([]Course, 0, 200)
 

diff --git a/_examples/parallel/parallel.go b/_examples/parallel/parallel.go
@@ -8,7 +8,11 @@ import (
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
+	c := colly.NewCollector(
+		// MaxDepth is 2, so only the links on the scraped page
+		// and links on those pages are visited
+		colly.MaxDepth(2),
+	)
 
 	// Limit the maximum parallelism to 5
 	// This is necessary if the goroutines are dynamically
@@ -18,10 +22,6 @@ func main() {
 	// number of go routines.
 	c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 5})
 
-	// MaxDepth is 2, so only the links on the scraped page
-	// and links on those pages are visited
-	c.MaxDepth = 2
-
 	// On every a element which has href attribute call callback
 	c.OnHTML("a[href]", func(e *colly.HTMLElement) {
 		link := e.Attr("href")

diff --git a/_examples/proxy_switcher/proxy_switcher.go b/_examples/proxy_switcher/proxy_switcher.go
@@ -10,7 +10,7 @@ import (
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
+	c := colly.NewCollector(colly.AllowURLRevisit())
 
 	// Rotate two socks5 proxies
 	rp, err := proxy.RoundRobinProxySwitcher("socks5://127.0.0.1:1337", "socks5://127.0.0.1:1338")
@@ -25,7 +25,6 @@ func main() {
 	})
 
 	// Fetch httpbin.org/ip five times
-	c.AllowURLRevisit = true
 	for i := 0; i < 5; i++ {
 		c.Visit("https://httpbin.org/ip")
 	}

diff --git a/_examples/random_delay/random_delay.go b/_examples/random_delay/random_delay.go
@@ -12,10 +12,10 @@ func main() {
 	url := "https://httpbin.org/delay/2"
 
 	// Instantiate default collector
-	c := colly.NewCollector()
-
-	// Attach a debugger to the collector
-	c.SetDebugger(&debug.LogDebugger{})
+	c := colly.NewCollector(
+		// Attach a debugger to the collector
+		colly.Debugger(&debug.LogDebugger{}),
+	)
 
 	// Limit the number of threads started by colly to two
 	// when visiting links which domains' matches "*httpbin.*" glob

diff --git a/_examples/rate_limit/rate_limit.go b/_examples/rate_limit/rate_limit.go
@@ -11,10 +11,10 @@ func main() {
 	url := "https://httpbin.org/delay/2"
 
 	// Instantiate default collector
-	c := colly.NewCollector()
-
-	// Attach a debugger to the collector
-	c.SetDebugger(&debug.LogDebugger{})
+	c := colly.NewCollector(
+		// Attach a debugger to the collector
+		colly.Debugger(&debug.LogDebugger{}),
+	)
 
 	// Limit the number of threads started by colly to two
 	// when visiting links which domains' matches "*httpbin.*" glob

diff --git a/_examples/url_filter/url_filter.go b/_examples/url_filter/url_filter.go
@@ -9,13 +9,13 @@ import (
 
 func main() {
 	// Instantiate default collector
-	c := colly.NewCollector()
-
-	// Visit only root url and urls which start with "e" or "h" on httpbin.org
-	c.URLFilters = []*regexp.Regexp{
-		regexp.MustCompile("http://httpbin\\.org/(|e.+)$"),
-		regexp.MustCompile("http://httpbin\\.org/h.+"),
-	}
+	c := colly.NewCollector(
+		// Visit only root url and urls which start with "e" or "h" on httpbin.org
+		colly.URLFilters(
+			regexp.MustCompile("http://httpbin\\.org/(|e.+)$"),
+			regexp.MustCompile("http://httpbin\\.org/h.+"),
+		),
+	)
 
 	// On every a element which has href attribute call callback
 	c.OnHTML("a[href]", func(e *colly.HTMLElement) {

diff --git a/_examples/xkcd_store/xkcd_store.go b/_examples/xkcd_store/xkcd_store.go
@@ -20,11 +20,12 @@ func main() {
 	defer writer.Flush()
 	// Write CSV header
 	writer.Write([]string{"Name", "Price", "URL", "Image URL"})
-	// Instantiate default collector
-	c := colly.NewCollector()
 
-	// Allow requests only to store.xkcd.com
-	c.AllowedDomains = []string{"store.xkcd.com"}
+	// Instantiate default collector
+	c := colly.NewCollector(
+		// Allow requests only to store.xkcd.com
+		colly.AllowedDomains("store.xkcd.com"),
+	)
 
 	// Extract product details
 	c.OnHTML(".product-grid-item", func(e *colly.HTMLElement) {