Skip to content

Commit

Permalink
step
Browse files Browse the repository at this point in the history
  • Loading branch information
Miniast committed May 6, 2024
1 parent a870239 commit ce447e3
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ node_modules
archive
pnpm-lock.yaml
dist
test
tests
6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "",
"exports": "./index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"test": "npx mocha --timeout=15000 tests/*.test.js"
},
"engines": {
"node": ">=16"
Expand All @@ -25,6 +25,10 @@
"devDependencies": {
"@types/got": "^9.6.12",
"@types/node": "^20.10.6",
"chai": "^5.1.0",
"mocha": "^10.4.0",
"nock": "^13.5.4",
"sinon": "^17.0.1",
"tsx": "^4.7.3"
}
}
36 changes: 21 additions & 15 deletions src/crawler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { EventEmitter } from "events";
import { Cluster } from "./rateLimiter/index.js";
import { isFunction, setDefaults, flattenDeep } from "./lib/utils.js";
import { isBoolean, isFunction, setDefaults, flattenDeep } from "./lib/utils.js";
import { getValidOptions, alignOptions } from "./options.js";
import { logOptions } from "./logger.js";
import type { crawlerOptions, requestOptions } from "./types/crawler.js";
Expand Down Expand Up @@ -56,9 +56,9 @@ class Crawler extends EventEmitter {
];

this._limiters = new Cluster({
maxConnections: this.options.maxConnections,
rateLimit: this.options.rateLimit,
priorityLevels: this.options.priorityLevels,
maxConnections: this.options.maxConnections as number,
rateLimit: this.options.rateLimit as number,
priorityLevels: this.options.priorityLevels as number,
defaultPriority: this.options.priority as number,
homogeneous: this.options.homogeneous,
});
Expand Down Expand Up @@ -110,13 +110,16 @@ class Crawler extends EventEmitter {
}

if (options.html) {
options.url = options.url ?? "";
this._handler(null, options, { body: options.html, headers: { "content-type": "text/html" } });
} else if (typeof options.uri === "function") {
options.uri((uri: any) => {
options.url = uri;
this._execute(options);
});
} else {
options.url = options.url ?? options.uri;
delete options.uri;
this._execute(options);
}
});
Expand All @@ -128,7 +131,7 @@ class Crawler extends EventEmitter {

options.headers = options.headers ?? {};

if (options.forceUTF8 || options.json) options.encoding = null;
if (options.forceUTF8 || options.json) options.encoding = "utf8";

if (options.rotateUA && Array.isArray(options.userAgent)) {
this._rotatingUAIndex = this._rotatingUAIndex % options.userAgent.length;
Expand All @@ -150,24 +153,25 @@ class Crawler extends EventEmitter {
}
}

// @todo skipEventRequest
if (options.skipEventRequest !== true) {
this.emit("request", options)
}

try {
const response = await got(alignOptions({ ...options }));
return this._handler(null, options, response);
} catch (error) {
log.info("error:", error);
log.error("error:", error);
return this._handler(error, options);
}
};

private _handler = (error: any | null, options: requestOptions, response?: any): any => {
if (error) {
log.info(
`Error: ${error} when fetching ${options.url} ${options.retries ? `(${options.retries} retries left)` : ""
}`
log.error(
`${error} when fetching ${options.url} ${options.retries ? `(${options.retries} retries left)` : ""}`
);
if (options.retries) {
if (options.retries && options.retries > 0) {
setTimeout(() => {
options.retries!--;
this._execute(options as crawlerOptions);
Expand Down Expand Up @@ -221,25 +225,27 @@ class Crawler extends EventEmitter {
return 0;
}

public send = async (options: requestOptions): Promise<any> => {
public send = async (options: string | requestOptions): Promise<any> => {
options = getValidOptions(options) as requestOptions;
options.retries = options.retries ?? 0;
setDefaults(options, this.options);
this.globalOnlyOptions.forEach(globalOnlyOption => {
delete (options as any)[globalOnlyOption];
});
options.skipEventRequest = isBoolean(options.skipEventRequest) ? options.skipEventRequest : true;
delete options.preRequest;
return await this._execute(options as crawlerOptions);
};
/**
* @deprecated
* @description Old interface version. It is recommended to use `Crawler.send()` instead.
* @see Crawler.send
*/
public direct = async (options: requestOptions): Promise<any> => {
public direct = async (options: string | requestOptions): Promise<any> => {
return await this.send(options);
};

public add = async (options: requestOptions | requestOptions[]): Promise<void> => {
public add = async (options: string | requestOptions | requestOptions[]): Promise<void> => {
let optionsArray = Array.isArray(options) ? options : [options];
optionsArray = flattenDeep(optionsArray);
optionsArray.forEach(options => {
Expand Down Expand Up @@ -273,7 +279,7 @@ class Crawler extends EventEmitter {
* @description Old interface version. It is recommended to use `Crawler.add()` instead.
* @see Crawler.add
*/
public queue = async (options: requestOptions | requestOptions[]): Promise<void> => {
public queue = async (options: string | requestOptions | requestOptions[]): Promise<void> => {
return this.add(options);
};
}
Expand Down
1 change: 1 addition & 0 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export const isNumber = (value: unknown): boolean => getType(value) === "number"

export const isFunction = (value: unknown): boolean => getType(value) === "function";

export const isBoolean = (value: unknown): boolean => getType(value) === "boolean";
/**
* @param target
* @param source
Expand Down
2 changes: 1 addition & 1 deletion src/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export const logOptions = {
ERROR: ["bold", "red"],
FATAL: ["bold", "redBright"],
},
name: ["bold", "blue"],
name: ["bold", "green"],
dateIsoStr: "white",
filePathWithLine: "white",
nameWithDelimiterPrefix: ["white", "bold"],
Expand Down
5 changes: 3 additions & 2 deletions src/options.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { HttpProxyAgent, HttpsProxyAgent } from "hpagent";
import { proxies as Http2Proxies } from "http2-wrapper";
import http2Wrapper from "http2-wrapper";
import { cleanObject, getType, isValidUrl } from "./lib/utils.js";

export const getValidOptions = (options: unknown): Object => {
Expand Down Expand Up @@ -33,7 +33,7 @@ export const alignOptions = (options: any): any => {
"callback",
"release",
];
const deprecatedOptions = ["uri", "qs", "strictSSL", "gzip", "jar", "jsonReviver", "jsonReplacer"].concat(
const deprecatedOptions = ["uri", "qs", "strictSSL", "gzip", "jar", "jsonReviver", "jsonReplacer", "json", "skipEventRequest"].concat(
crawlerOnlyOptions
);
const defaultagent = {
Expand All @@ -58,6 +58,7 @@ export const alignOptions = (options: any): any => {

// http2 proxy
if (options.http2 === true && options.proxy) {
const { proxies: Http2Proxies } = http2Wrapper;
const protocol = options.proxy.startsWith("https") ? "https" : "http";
const http2Agent =
protocol === "https"
Expand Down
2 changes: 1 addition & 1 deletion src/types/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,6 @@ type requestOptions = {
callback?: (error: any, response: unknown, done: unknown) => void;
};

type crawlerOptions = globalOnlyOptions & requestOptions;
type crawlerOptions = Partial<globalOnlyOptions> & requestOptions;

export { crawlerOptions, requestOptions };
21 changes: 21 additions & 0 deletions test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// import Crawler from "./src/index.js";
// const c = new Crawler({
// jQuery: false,
// skipDuplicates: true,
// callback: function (error, result) {
// console.log("test")
// // expect(error).to.be.null;
// // expect(result.statusCode).to.equal(200);
// // expect(call.isDone()).to.be.true;
// // done();
// },
// });

// c.queue('http://target.com');
const a = {
a:1,
b:2,
}
delete a.c;
delete a.a;
console.log(a)

0 comments on commit ce447e3

Please sign in to comment.