Skip to content

Commit

Permalink
milestone
Browse files Browse the repository at this point in the history
  • Loading branch information
Miniast committed May 8, 2024
1 parent 0ddb502 commit e2682f8
Show file tree
Hide file tree
Showing 24 changed files with 2,565 additions and 43 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ node_modules
.vscode
archive
pnpm-lock.yaml
dist
tests
dist
4 changes: 1 addition & 3 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ import iconv from "iconv-lite";
import { Logger } from "tslog";

process.env.NODE_ENV = process.env.NODE_ENV ?? process.argv[2];
// test
import fs from "fs";
// process.env.NODE_ENV = "debug";
//

logOptions.minLevel = process.env.NODE_ENV === "debug" ? 0 : 3;
const log = new Logger(logOptions);

Expand Down
24 changes: 0 additions & 24 deletions test.json

This file was deleted.

14 changes: 0 additions & 14 deletions test.ts

This file was deleted.

75 changes: 75 additions & 0 deletions tests/cacheOption.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*jshint expr:true */
'use strict';

import Crawler from '../dist/index.js';
import { expect } from 'chai';
import nock from 'nock';

var c;
var scope;
var httpTarget = 'http://target.com';

describe('Cache features tests', function () {
describe('Skip Duplicate active', function () {
beforeEach(function () {
scope = nock('http://target.com');
});
afterEach(function () {
c = {};
});

it('should not skip one single url', function (done) {
var call = scope.get('/').reply(200);
c = new Crawler({
jQuery: false,
skipDuplicates: true,
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
expect(call.isDone()).to.be.true;
done();
},
});

c.queue(httpTarget);
});

it('should notify the callback when an error occurs and "retries" is disabled', function (done) {
var koScope = scope.get('/').replyWithError('too bad');
c = new Crawler({
jQuery: false,
skipDuplicates: true,
retries: 0,
callback: function (error) {
expect(error).to.exist;
expect(koScope.isDone()).to.be.true;
done();
},
});

c.queue(httpTarget);
});

it('should retry and notify the callback when an error occurs and "retries" is enabled', function (done) {
var koScope = scope.get('/').replyWithError('too bad').persist();

c = new Crawler({
jQuery: false,
skipDuplicates: true,
retries: 1,
retryTimeout: 10,
callback: function (error) {
expect(error).to.exist;
expect(koScope.isDone()).to.be.true;
scope.persist(false);
done();
},
});

c.queue(httpTarget);
});

//it('should skip previous crawled urls', function (done) {});
});
});

71 changes: 71 additions & 0 deletions tests/callback.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

'use strict';

import Crawler from '../dist/index.js';
import nock from 'nock';

describe('Callback test', function() {
before(function() {
nock.cleanAll();
});

let crawler = null;
const url = 'http://www.whatever.com';

beforeEach(() => {
crawler = new Crawler({
retryTimeout:0,
retries:0,
timeout:100,
logger: {
log:() => {}
},
});
});

afterEach(() => {
crawler = null;
});

it('should end as expected without callback', function(done) {
nock(url)
.get('/get')
.reply(200, '<html></html>',{
'Content-Type': 'text/html'
});

crawler.on('drain', done);
crawler.queue(`${url}/get`);
});

it('should end as expected without callback when timedout', function(done) {
/*
* TODO: request.js claim that it has ETIMEDOUT error which means time spent by the server to send response headers
* But the source code reflects the point is `connect` event on socket.
*/
nock(url)
.get('/delay')
//.delay({head:1000})
//.delayConnection(5000)
.delayBody(500)
//.socketDelay(2000)
.reply(200, '<html></html>',{
'Content-Type': 'text/html'
});

crawler.on('drain', done);
crawler.queue(`${url}/delay`);
});

it('should end as expected without callback when encoding error', function(done) {
nock(url)
.get('/get')
.reply(200, '<html></html>',{
'Content-Type': 'text/html'
});

crawler._doEncoding = function(){throw new Error('Error for testing.');};
crawler.on('drain', done);
crawler.queue(`${url}/get`);
});
});
115 changes: 115 additions & 0 deletions tests/direct.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*jshint expr:true */
'use strict';

import Crawler from '../dist/index.js';
import { expect } from 'chai';
import sinon from 'sinon';
// settings for nock to mock http server
import nock from 'nock';

// init variables

let cb;
let crawler;

describe('Direct feature tests', function () {

before(function () {
nock.cleanAll();
nock('http://test.crawler.com').get('/').reply(200, 'ok').persist();
});

beforeEach(function () {
cb = sinon.spy();
crawler = new Crawler({
jQuery: false,
rateLimit: 100,
preRequest: (options, done) => {
cb('preRequest');
done();
},
callback: (err, res, done) => {
if (err) {
cb('error');
} else {
cb('callback');
}
done();
}
});
crawler.on('request', () => {
cb('Event:request');
});
});

it('should not trigger preRequest or callback of crawler instance', function (finishTest) {
crawler.direct({
uri: 'http://test.crawler.com/',
callback: (error, res) => {
expect(error).to.be.null;
expect(res.statusCode).to.equal(200);
expect(res.body).to.equal('ok');
expect(cb.called).to.be.false;
finishTest();
}
});
});

it('should be sent directly regardless of current queue of crawler', function (finishTest) {
crawler.queue({
uri: 'http://test.crawler.com/',
callback: (error, res, done) => {
expect(error).to.be.null;
crawler.direct({
uri: 'http://test.crawler.com/',
callback: () => {
expect(cb.getCalls().length).to.equal(2);
cb('direct');
}
});
done();
}
});
crawler.queue('http://test.crawler.com/');
crawler.queue('http://test.crawler.com/');
crawler.queue({
uri: 'http://test.crawler.com/',
callback: (error, res, done) => {
expect(error).to.be.null;
let seq = ['preRequest', 'Event:request', 'direct', 'preRequest', 'Event:request', 'callback', 'preRequest', 'Event:request', 'callback', 'preRequest', 'Event:request'];
expect(cb.getCalls().map(c => c.args[0]).join()).to.equal(seq.join());
expect(cb.getCalls().length).to.equal(11);
done();
finishTest();
}
});
});

it('should not trigger Event:request by default', function (finishTest) {
crawler.direct({
uri: 'http://test.crawler.com/',
callback: (error, res) => {
expect(error).to.be.null;
expect(res.statusCode).to.equal(200);
expect(res.body).to.equal('ok');
expect(cb.called).to.be.false;
finishTest();
}
});
});

it('should trigger Event:request if specified in options', function (finishTest) {
crawler.direct({
uri: 'http://test.crawler.com/',
skipEventRequest: false,
callback: (error, res) => {
expect(error).to.be.null;
expect(res.statusCode).to.equal(200);
expect(res.body).to.equal('ok');
expect(cb.calledOnce).to.be.true;
expect(cb.firstCall.args[0]).to.equal('Event:request');
finishTest();
}
});
});
});
Loading

0 comments on commit e2682f8

Please sign in to comment.