-
Notifications
You must be signed in to change notification settings - Fork 877
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
2,565 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,5 +2,4 @@ node_modules | |
.vscode | ||
archive | ||
pnpm-lock.yaml | ||
dist | ||
tests | ||
dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/*jshint expr:true */ | ||
'use strict'; | ||
|
||
import Crawler from '../dist/index.js'; | ||
import { expect } from 'chai'; | ||
import nock from 'nock'; | ||
|
||
var c; | ||
var scope; | ||
var httpTarget = 'http://target.com'; | ||
|
||
describe('Cache features tests', function () { | ||
describe('Skip Duplicate active', function () { | ||
beforeEach(function () { | ||
scope = nock('http://target.com'); | ||
}); | ||
afterEach(function () { | ||
c = {}; | ||
}); | ||
|
||
it('should not skip one single url', function (done) { | ||
var call = scope.get('/').reply(200); | ||
c = new Crawler({ | ||
jQuery: false, | ||
skipDuplicates: true, | ||
callback: function (error, result) { | ||
expect(error).to.be.null; | ||
expect(result.statusCode).to.equal(200); | ||
expect(call.isDone()).to.be.true; | ||
done(); | ||
}, | ||
}); | ||
|
||
c.queue(httpTarget); | ||
}); | ||
|
||
it('should notify the callback when an error occurs and "retries" is disabled', function (done) { | ||
var koScope = scope.get('/').replyWithError('too bad'); | ||
c = new Crawler({ | ||
jQuery: false, | ||
skipDuplicates: true, | ||
retries: 0, | ||
callback: function (error) { | ||
expect(error).to.exist; | ||
expect(koScope.isDone()).to.be.true; | ||
done(); | ||
}, | ||
}); | ||
|
||
c.queue(httpTarget); | ||
}); | ||
|
||
it('should retry and notify the callback when an error occurs and "retries" is enabled', function (done) { | ||
var koScope = scope.get('/').replyWithError('too bad').persist(); | ||
|
||
c = new Crawler({ | ||
jQuery: false, | ||
skipDuplicates: true, | ||
retries: 1, | ||
retryTimeout: 10, | ||
callback: function (error) { | ||
expect(error).to.exist; | ||
expect(koScope.isDone()).to.be.true; | ||
scope.persist(false); | ||
done(); | ||
}, | ||
}); | ||
|
||
c.queue(httpTarget); | ||
}); | ||
|
||
//it('should skip previous crawled urls', function (done) {}); | ||
}); | ||
}); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
|
||
'use strict'; | ||
|
||
import Crawler from '../dist/index.js'; | ||
import nock from 'nock'; | ||
|
||
describe('Callback test', function() { | ||
before(function() { | ||
nock.cleanAll(); | ||
}); | ||
|
||
let crawler = null; | ||
const url = 'http://www.whatever.com'; | ||
|
||
beforeEach(() => { | ||
crawler = new Crawler({ | ||
retryTimeout:0, | ||
retries:0, | ||
timeout:100, | ||
logger: { | ||
log:() => {} | ||
}, | ||
}); | ||
}); | ||
|
||
afterEach(() => { | ||
crawler = null; | ||
}); | ||
|
||
it('should end as expected without callback', function(done) { | ||
nock(url) | ||
.get('/get') | ||
.reply(200, '<html></html>',{ | ||
'Content-Type': 'text/html' | ||
}); | ||
|
||
crawler.on('drain', done); | ||
crawler.queue(`${url}/get`); | ||
}); | ||
|
||
it('should end as expected without callback when timedout', function(done) { | ||
/* | ||
* TODO: request.js claim that it has ETIMEDOUT error which means time spent by the server to send response headers | ||
* But the source code reflects the point is `connect` event on socket. | ||
*/ | ||
nock(url) | ||
.get('/delay') | ||
//.delay({head:1000}) | ||
//.delayConnection(5000) | ||
.delayBody(500) | ||
//.socketDelay(2000) | ||
.reply(200, '<html></html>',{ | ||
'Content-Type': 'text/html' | ||
}); | ||
|
||
crawler.on('drain', done); | ||
crawler.queue(`${url}/delay`); | ||
}); | ||
|
||
it('should end as expected without callback when encoding error', function(done) { | ||
nock(url) | ||
.get('/get') | ||
.reply(200, '<html></html>',{ | ||
'Content-Type': 'text/html' | ||
}); | ||
|
||
crawler._doEncoding = function(){throw new Error('Error for testing.');}; | ||
crawler.on('drain', done); | ||
crawler.queue(`${url}/get`); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/*jshint expr:true */ | ||
'use strict'; | ||
|
||
import Crawler from '../dist/index.js'; | ||
import { expect } from 'chai'; | ||
import sinon from 'sinon'; | ||
// settings for nock to mock http server | ||
import nock from 'nock'; | ||
|
||
// init variables | ||
|
||
let cb; | ||
let crawler; | ||
|
||
describe('Direct feature tests', function () { | ||
|
||
before(function () { | ||
nock.cleanAll(); | ||
nock('http://test.crawler.com').get('/').reply(200, 'ok').persist(); | ||
}); | ||
|
||
beforeEach(function () { | ||
cb = sinon.spy(); | ||
crawler = new Crawler({ | ||
jQuery: false, | ||
rateLimit: 100, | ||
preRequest: (options, done) => { | ||
cb('preRequest'); | ||
done(); | ||
}, | ||
callback: (err, res, done) => { | ||
if (err) { | ||
cb('error'); | ||
} else { | ||
cb('callback'); | ||
} | ||
done(); | ||
} | ||
}); | ||
crawler.on('request', () => { | ||
cb('Event:request'); | ||
}); | ||
}); | ||
|
||
it('should not trigger preRequest or callback of crawler instance', function (finishTest) { | ||
crawler.direct({ | ||
uri: 'http://test.crawler.com/', | ||
callback: (error, res) => { | ||
expect(error).to.be.null; | ||
expect(res.statusCode).to.equal(200); | ||
expect(res.body).to.equal('ok'); | ||
expect(cb.called).to.be.false; | ||
finishTest(); | ||
} | ||
}); | ||
}); | ||
|
||
it('should be sent directly regardless of current queue of crawler', function (finishTest) { | ||
crawler.queue({ | ||
uri: 'http://test.crawler.com/', | ||
callback: (error, res, done) => { | ||
expect(error).to.be.null; | ||
crawler.direct({ | ||
uri: 'http://test.crawler.com/', | ||
callback: () => { | ||
expect(cb.getCalls().length).to.equal(2); | ||
cb('direct'); | ||
} | ||
}); | ||
done(); | ||
} | ||
}); | ||
crawler.queue('http://test.crawler.com/'); | ||
crawler.queue('http://test.crawler.com/'); | ||
crawler.queue({ | ||
uri: 'http://test.crawler.com/', | ||
callback: (error, res, done) => { | ||
expect(error).to.be.null; | ||
let seq = ['preRequest', 'Event:request', 'direct', 'preRequest', 'Event:request', 'callback', 'preRequest', 'Event:request', 'callback', 'preRequest', 'Event:request']; | ||
expect(cb.getCalls().map(c => c.args[0]).join()).to.equal(seq.join()); | ||
expect(cb.getCalls().length).to.equal(11); | ||
done(); | ||
finishTest(); | ||
} | ||
}); | ||
}); | ||
|
||
it('should not trigger Event:request by default', function (finishTest) { | ||
crawler.direct({ | ||
uri: 'http://test.crawler.com/', | ||
callback: (error, res) => { | ||
expect(error).to.be.null; | ||
expect(res.statusCode).to.equal(200); | ||
expect(res.body).to.equal('ok'); | ||
expect(cb.called).to.be.false; | ||
finishTest(); | ||
} | ||
}); | ||
}); | ||
|
||
it('should trigger Event:request if specified in options', function (finishTest) { | ||
crawler.direct({ | ||
uri: 'http://test.crawler.com/', | ||
skipEventRequest: false, | ||
callback: (error, res) => { | ||
expect(error).to.be.null; | ||
expect(res.statusCode).to.equal(200); | ||
expect(res.body).to.equal('ok'); | ||
expect(cb.calledOnce).to.be.true; | ||
expect(cb.firstCall.args[0]).to.equal('Event:request'); | ||
finishTest(); | ||
} | ||
}); | ||
}); | ||
}); |
Oops, something went wrong.