Skip to content

Commit

Permalink
step
Browse files Browse the repository at this point in the history
  • Loading branch information
Miniast committed Sep 27, 2023
1 parent a39c632 commit e688b17
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 53 deletions.
81 changes: 42 additions & 39 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,22 @@ import * as http2 from "http2";
import path from "path";
import util from "util";
import logger from "./logger.js";

import Bottleneck from "bottleneck";
import seenreq from "seenreq";

import { RateLimiter, Cluster } from "./rateLimiter/index.js";
import { getType } from "./lib/utils.js";
import type { crawlerOptions } from "./types/crawler.js";

const normalizeContentType = (contentType: string) => {
//@todo
};
const crawler = (options: crawlerOptions) => {};
// 导入依赖库
const getType = (obj: any): string => Object.prototype.toString.call(obj).slice(8, -1);


// 定义 Crawler 类
class Crawler {
private options: crawlerOptions;
private globalOnlyOptions: string[];
private _limiters: Cluster;

public options: crawlerOptions;
public globalOnlyOptions: string[];
// private limiters: Bottleneck.Cluster;
// private http2Connections: Record<string, any>;
// //private log: (level: string, message: string) => void;
Expand All @@ -39,7 +37,7 @@ class Crawler {
maxConnections: 10,
method: "GET",
priority: 5,
priorityRange: 10,
priorityCount: 10,
rateLimit: 0,
referer: false,
retries: 3,
Expand All @@ -54,38 +52,43 @@ class Crawler {

this.globalOnlyOptions = ["skipDuplicates", "rotateUA"];

// this.limiters = new Bottleneck.Cluster(
// this.options.maxConnections,
// this.options.rateLimit,
// this.options.priorityRange,
// this.options.priority,
// this.options.homogeneous
// );

// this.http2Connections = {};

const level = this.options.debug ? "debug" : "info";

// this.seen = new seenreq(this.options.seenreq);
// this.seen
// .initialize()
// .then(() => this.log("debug", "seenreq is initialized."))
// .catch(e => this.log("error", e));

// this.on("_release", () => {
// this.log("debug", `Queue size: ${this.queueSize}`);

// if (this.limiters.empty) {
// if (Object.keys(this.http2Connections).length > 0) {
// this._clearHttp2Session();
// }
// this.emit("drain");
// }
// });
this._limiters = new Cluster({
maxConnections: this.options.maxConnections,
rateLimit: this.options.rateLimit,
priorityCount: this.options.priorityCount,
defaultPriority: this.options.priority,
homogeneous: this.options.homogeneous,
});

this.on("_release", () => {
this.log("debug", `Queue size: ${this.queueSize}`);

if (this.limiters.empty) {
if (Object.keys(this.http2Connections).length > 0) {
this._clearHttp2Session();
}
this.emit("drain");
}
});
}
public run = (options: crawlerOptions): void => {
isPlainObject(options);
private _isValidOptions = (options: unknown): boolean => {
const type = getType(options);
if (type === "string") {
try {
options = JSON.parse(options as string);
return true;
} catch (e) {
return false;
}
} else if (type === "object") {
const prototype = Object.getPrototypeOf(options);
return prototype === Object.prototype || prototype === null;
}
return false;
};
// public run = (options: crawlerOptions): void => {
// if()
// };
// 添加 emit 方法
private emit(event: string): void {
// 实现事件触发逻辑
Expand Down
8 changes: 6 additions & 2 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
export const getType = (value: any): string => Object.prototype.toString.call(value).slice(8, -1).toLocaleLowerCase();
export const isNumber = (value: any): boolean => getType(value) === "number" && !isNaN(value);
/**
*
* @returns type of param, a lower case string
*/
export const getType = (value: unknown): string => Object.prototype.toString.call(value).slice(8, -1).toLocaleLowerCase();
export const isNumber = (value: unknown): boolean => getType(value) === "number" && !isNaN(value as number);
8 changes: 4 additions & 4 deletions src/rateLimiter/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ class Cluster {
private _homogeneous: boolean;
private _interval: NodeJS.Timeout | null = null;

public globalMaxConcurrency: number;
public globalMaxConnections: number;
public globalRateLimit: number;
public globalPriorityCount: number;
public globalDefaultPriority: number;

constructor({ maxConcurrency, rateLimit, priorityCount, defaultPriority, homogeneous }: ClusterOptions) {
this.globalMaxConcurrency = maxConcurrency;
constructor({ maxConnections, rateLimit, priorityCount, defaultPriority, homogeneous }: ClusterOptions) {
this.globalMaxConnections = maxConnections;
this.globalRateLimit = rateLimit;
this.globalPriorityCount = priorityCount;
this.globalDefaultPriority = defaultPriority;
Expand All @@ -27,7 +27,7 @@ class Cluster {
createRateLimiter(id: string = ""): RateLimiter | undefined {
if (!this._rateLimiters[id]) {
this._rateLimiters[id] = new RateLimiter({
"maxConcurrency": this.globalMaxConcurrency,
"maxConnections": this.globalMaxConnections,
"rateLimit": this.globalRateLimit,
"priorityCount": this.globalPriorityCount,
"defaultPriority": this.globalDefaultPriority,
Expand Down
14 changes: 7 additions & 7 deletions src/rateLimiter/rateLimiter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export interface TaskWrapper {
}

export interface RateLimiterOptions {
maxConcurrency: number;
maxConnections: number;
rateLimit: number;
priorityCount: number;
defaultPriority: number;
Expand All @@ -23,18 +23,18 @@ class RateLimiter {
private _cluster?: Cluster;

public id?: string;
public maxConcurrency: number;
public maxConnections: number;
public nextRequestTime: number;
public rateLimit: number;
public runningSize: number;
public priorityCount: number;
public defaultPriority: number;

constructor({ maxConcurrency, rateLimit, priorityCount = 1, defaultPriority = 0, cluster }: RateLimiterOptions) {
if (!Number.isInteger(maxConcurrency) || !Number.isInteger(rateLimit) || !Number.isInteger(priorityCount)) {
throw new Error("maxConcurrency, rateLimit and priorityCount must be positive integers");
constructor({ maxConnections, rateLimit, priorityCount = 1, defaultPriority = 0, cluster }: RateLimiterOptions) {
if (!Number.isInteger(maxConnections) || !Number.isInteger(rateLimit) || !Number.isInteger(priorityCount)) {
throw new Error("maxConnections, rateLimit and priorityCount must be positive integers");
}
this.maxConcurrency = maxConcurrency;
this.maxConnections = maxConnections;
this.priorityCount = priorityCount;
this.defaultPriority = Number.isInteger(defaultPriority) ? defaultPriority : Math.floor(this.priorityCount / 2);
this.defaultPriority >= priorityCount ? priorityCount - 1 : defaultPriority;
Expand Down Expand Up @@ -75,7 +75,7 @@ class RateLimiter {
}

private _schedule_old(): void {
if (this.runningSize < this.maxConcurrency && this.hasWaitingTasks()) {
if (this.runningSize < this.maxConnections && this.hasWaitingTasks()) {
++this.runningSize;
const delay = Math.max(this.nextRequestTime - Date.now(), 0);
this.nextRequestTime = Date.now() + delay + this.rateLimit;
Expand Down
5 changes: 5 additions & 0 deletions src/test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
const a = Object.getPrototypeOf(Object.prototype)
const b = {
"a": 1,
}
console.log(a)
2 changes: 1 addition & 1 deletion src/types/crawler.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ interface crawlerOptions {
maxConnections: number;
method: string;
priority: number;
priorityRange: number;
priorityCount: number;
rateLimit: number;
referer: boolean;
retries: number;
Expand Down

0 comments on commit e688b17

Please sign in to comment.