Skip to content

Commit

Permalink
Add Random Delay for Limit Rule
Browse files Browse the repository at this point in the history
Description:

    Added random delay config to limitRule that allow user to specify additional parameter to
        randomized the delay on each request.

    Added an example for random delay

Testing Methodogy:

    Used the rate_limit app in the _example dir and verify that adding RandomDelay will cause
        each request to have different delay time.
  • Loading branch information
jlr52 committed Dec 19, 2017
1 parent 685e171 commit 3e8951f
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
36 changes: 36 additions & 0 deletions _examples/random_delay/random_delay.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package main

import (
"fmt"
"time"

"github.com/gocolly/colly"
"github.com/gocolly/colly/debug"
)

func main() {
url := "https://httpbin.org/delay/2"

// Instantiate default collector
c := colly.NewCollector()

// Attach a debugger to the collector
c.SetDebugger(&debug.LogDebugger{})

// Limit the number of threads started by colly to two
// when visiting links which domains' matches "*httpbin.*" glob
c.Limit(&colly.LimitRule{
DomainGlob: "*httpbin.*",
Parallelism: 2,
RandomDelay: 5 * time.Second,
})

// Start scraping in four threads on https://httpbin.org/delay/2
for i := 0; i < 4; i++ {
go c.Visit(fmt.Sprintf("%s?n=%d", url, i))
}
// Start scraping on https://httpbin.org/delay/2
c.Visit(url)
// Wait until threads are finished
c.Wait()
}
10 changes: 9 additions & 1 deletion http_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"errors"
"io"
"io/ioutil"
"math/rand"
"net/http"
"net/http/cookiejar"
"os"
Expand Down Expand Up @@ -37,6 +38,8 @@ type LimitRule struct {
DomainGlob string
// Delay is the duration to wait before creating a new request to the matching domains
Delay time.Duration
// RandomDelay is the extra randomized duration to wait added to Delay before creating a new request
RandomDelay time.Duration
// Parallelism is the number of the maximum allowed concurrent requests of the matching domains
Parallelism int
waitChan chan bool
Expand Down Expand Up @@ -148,7 +151,12 @@ func (h *httpBackend) Do(request *http.Request, bodySize int) (*Response, error)
if r != nil {
r.waitChan <- true
defer func(r *LimitRule) {
time.Sleep(r.Delay)
randomDelay := time.Duration(0)
rand.Seed(time.Now().UnixNano())
if r.RandomDelay != 0 {
randomDelay = time.Duration(rand.Intn(int(r.RandomDelay)))
}
time.Sleep(r.Delay + randomDelay)
<-r.waitChan
}(r)
}
Expand Down

0 comments on commit 3e8951f

Please sign in to comment.