Skip to content

Commit

Permalink
V4.6.6-1 (labring#656)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu authored Dec 27, 2023
1 parent 86286ef commit 759a233
Show file tree
Hide file tree
Showing 182 changed files with 3,084 additions and 81,670 deletions.
22 changes: 12 additions & 10 deletions docSite/content/docs/development/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@ weight: 708

**使用时,请务必去除注释!**

以下配置适用于V4.6.6-alpha版本以后

```json
{
"SystemParams": {
"systemEnv": {
"pluginBaseUrl": "", // 商业版接口地址
"vectorMaxProcess": 15, // 向量生成最大进程,结合数据库性能和 key 来设置
"qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置
"pgHNSWEfSearch": 100 // pg vector 索引参数,越大精度高但速度慢
},
"ChatModels": [ // 对话模型
"chatModels": [ // 对话模型
{
"model": "gpt-3.5-turbo-1106",
"name": "GPT35-1106",
Expand Down Expand Up @@ -76,7 +78,7 @@ weight: 708
"defaultSystemChatPrompt": ""
}
],
"QAModels": [ // QA 生成模型
"qaModels": [ // QA 生成模型
{
"model": "gpt-3.5-turbo-16k",
"name": "GPT35-16k",
Expand All @@ -85,7 +87,7 @@ weight: 708
"price": 0
}
],
"CQModels": [ // 问题分类模型
"cqModels": [ // 问题分类模型
{
"model": "gpt-3.5-turbo-1106",
"name": "GPT35-1106",
Expand All @@ -105,7 +107,7 @@ weight: 708
"functionPrompt": ""
}
],
"ExtractModels": [ // 内容提取模型
"extractModels": [ // 内容提取模型
{
"model": "gpt-3.5-turbo-1106",
"name": "GPT35-1106",
Expand All @@ -116,7 +118,7 @@ weight: 708
"functionPrompt": ""
}
],
"QGModels": [ // 生成下一步指引
"qgModels": [ // 生成下一步指引
{
"model": "gpt-3.5-turbo-1106",
"name": "GPT35-1106",
Expand All @@ -125,7 +127,7 @@ weight: 708
"price": 0
}
],
"VectorModels": [ // 向量模型
"vectorModels": [ // 向量模型
{
"model": "text-embedding-ada-002",
"name": "Embedding-2",
Expand All @@ -134,8 +136,8 @@ weight: 708
"maxToken": 3000
}
],
"ReRankModels": [], // 重排模型,暂时填空数组
"AudioSpeechModels": [
"reRankModels": [], // 重排模型,暂时填空数组
"audioSpeechModels": [
{
"model": "tts-1",
"name": "OpenAI TTS1",
Expand All @@ -152,7 +154,7 @@ weight: 708
]
}
],
"WhisperModel": {
"whisperModel": {
"model": "whisper-1",
"name": "Whisper1",
"price": 0
Expand Down
2 changes: 1 addition & 1 deletion docSite/content/docs/development/upgrading/465.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ weight: 831

## 配置文件变更

由于 openai 已开始启用 function call,改为 toolChoice。FastGPT 同步的修改了对于的配置和调用方式,需要对配置文件做一些修改:
由于 openai 已开始弃用 function call,改为 toolChoice。FastGPT 同步的修改了对于的配置和调用方式,需要对配置文件做一些修改:

[点击查看最新的配置文件](/docs/development/configuration/)

Expand Down
22 changes: 22 additions & 0 deletions docSite/content/docs/development/upgrading/466.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
title: 'V4.6.6(需要改配置文件)'
description: 'FastGPT V4.6.6'
icon: 'upgrade'
draft: false
toc: true
weight: 831
---

**版本仍在开发中……**

## 配置文件变更

为了减少代码重复度,我们对配置文件做了一些修改:[点击查看最新的配置文件](/docs/development/configuration/)



## V4.6.6 即将更新

1. UI 优化,未来将逐步替换新的UI设计。


10 changes: 6 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
"prepare": "husky install",
"format-code": "prettier --config \"./.prettierrc.js\" --write \"./**/src/**/*.{ts,tsx,scss}\"",
"format-doc": "zhlint --dir ./docSite *.md --fix",
"gen:theme-typings": "chakra-cli tokens projects/app/src/web/styles/theme.ts --out node_modules/.pnpm/node_modules/@chakra-ui/styled-system/dist/theming.types.d.ts",
"postinstall": "sh ./scripts/postinstall.sh"
},
"devDependencies": {
"@chakra-ui/cli": "^2.4.1",
"husky": "^8.0.3",
"lint-staged": "^13.2.1",
"prettier": "^3.0.3",
"zhlint": "^0.7.1",
"i18next": "^22.5.1",
"lint-staged": "^13.2.1",
"next-i18next": "^13.3.0",
"react-i18next": "^12.3.1"
"prettier": "^3.0.3",
"react-i18next": "^12.3.1",
"zhlint": "^0.7.1"
},
"lint-staged": {
"./**/**/*.{ts,tsx,scss}": "npm run format-code",
Expand Down
62 changes: 62 additions & 0 deletions packages/global/common/file/read/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* read file to txt */
import * as pdfjsLib from 'pdfjs-dist';

export const readPdfFile = async ({ pdf }: { pdf: string | URL | ArrayBuffer }) => {
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';

type TokenType = {
str: string;
dir: string;
width: number;
height: number;
transform: number[];
fontName: string;
hasEOL: boolean;
};

const readPDFPage = async (doc: any, pageNo: number) => {
const page = await doc.getPage(pageNo);
const tokenizedText = await page.getTextContent();

const viewport = page.getViewport({ scale: 1 });
const pageHeight = viewport.height;
const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内

const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
return (
!token.transform ||
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
);
});

// concat empty string 'hasEOL'
for (let i = 0; i < pageTexts.length; i++) {
const item = pageTexts[i];
if (item.str === '' && pageTexts[i - 1]) {
pageTexts[i - 1].hasEOL = item.hasEOL;
pageTexts.splice(i, 1);
i--;
}
}

page.cleanup();

return pageTexts
.map((token) => {
const paragraphEnd = token.hasEOL && /([.?!\n\r]|(\r\n))$/.test(token.str);

return paragraphEnd ? `${token.str}\n` : token.str;
})
.join('');
};

const doc = await pdfjsLib.getDocument(pdf).promise;
const pageTextPromises = [];
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
pageTextPromises.push(readPDFPage(doc, pageNo));
}
const pageTexts = await Promise.all(pageTextPromises);

return pageTexts.join('');
};
38 changes: 38 additions & 0 deletions packages/global/common/string/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,41 @@ export const simpleMarkdownText = (rawText: string) => {

return rawText.trim();
};

/**
* format markdown
* 1. upload base64
* 2. replace \
*/
export const uploadMarkdownBase64 = async ({
rawText,
uploadImgController
}: {
rawText: string;
uploadImgController: (base64: string) => Promise<string>;
}) => {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64Img) => {
try {
const str = await uploadImgController(base64Img);

rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
})
);

// Remove white space on both sides of the picture
const trimReg = /(!\[.*\]\(.*\))\s*/g;
if (trimReg.test(rawText)) {
rawText = rawText.replace(trimReg, '$1');
}

return simpleMarkdownText(rawText);
};
20 changes: 13 additions & 7 deletions packages/global/common/string/textSplitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export const splitText2Chunks = (props: {

// The larger maxLen is, the next sentence is less likely to trigger splitting
const stepReges: { reg: RegExp; maxLen: number }[] = [
...customReg.map((text) => ({ reg: new RegExp(`([${text}])`, 'g'), maxLen: chunkLen * 1.4 })),
...customReg.map((text) => ({ reg: new RegExp(`(${text})`, 'g'), maxLen: chunkLen * 1.4 })),
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
Expand Down Expand Up @@ -64,13 +64,22 @@ export const splitText2Chunks = (props: {
}
];
}

const isCustomSteep = checkIsCustomStep(step);
const isMarkdownSplit = checkIsMarkdownSplit(step);
const independentChunk = checkIndependentChunk(step);

const { reg } = stepReges[step];

const splitTexts = text
.replace(reg, independentChunk ? `${splitMarker}$1` : `$1${splitMarker}`)
.replace(
reg,
(() => {
if (isCustomSteep) return splitMarker;
if (independentChunk) return `${splitMarker}$1`;
return `$1${splitMarker}`;
})()
)
.split(`${splitMarker}`)
.filter((part) => part.trim());

Expand Down Expand Up @@ -128,11 +137,6 @@ export const splitText2Chunks = (props: {
const independentChunk = checkIndependentChunk(step);
const isCustomStep = checkIsCustomStep(step);

// mini text
if (text.length <= chunkLen) {
return [text];
}

// oversize
if (step >= stepReges.length) {
if (text.length < chunkLen * 3) {
Expand Down Expand Up @@ -221,6 +225,8 @@ export const splitText2Chunks = (props: {
} else {
chunks.push(`${mdTitle}${lastText}`);
}
} else if (lastText && chunks.length === 0) {
chunks.push(lastText);
}

return chunks;
Expand Down
29 changes: 27 additions & 2 deletions packages/global/common/system/types/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
export type FeConfigsType = {
import type {
ChatModelItemType,
FunctionModelItemType,
LLMModelItemType,
VectorModelItemType,
AudioSpeechModels,
WhisperModelType,
ReRankModelItemType
} from '../../../core/ai/model.d';

/* fastgpt main */
export type FastGPTConfigFileType = {
feConfigs: FastGPTFeConfigsType;
systemEnv: SystemEnvType;
chatModels: ChatModelItemType[];
qaModels: LLMModelItemType[];
cqModels: FunctionModelItemType[];
extractModels: FunctionModelItemType[];
qgModels: LLMModelItemType[];
vectorModels: VectorModelItemType[];
reRankModels: ReRankModelItemType[];
audioSpeechModels: AudioSpeechModelType[];
whisperModel: WhisperModelType;
};

export type FastGPTFeConfigsType = {
show_emptyChat?: boolean;
show_register?: boolean;
show_appStore?: boolean;
Expand Down Expand Up @@ -34,6 +59,6 @@ export type SystemEnvType = {
};

declare global {
var feConfigs: FeConfigsType;
var feConfigs: FastGPTFeConfigsType;
var systemEnv: SystemEnvType;
}
1 change: 1 addition & 0 deletions packages/global/core/ai/model.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export type VectorModelItemType = {
defaultToken: number;
price: number;
maxToken: number;
weight: number;
};

export type ReRankModelItemType = {
Expand Down
3 changes: 2 additions & 1 deletion packages/global/core/ai/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const defaultVectorModels: VectorModelItemType[] = [
name: 'Embedding-2',
price: 0,
defaultToken: 500,
maxToken: 3000
maxToken: 3000,
weight: 100
}
];
1 change: 1 addition & 0 deletions packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ export type DatasetTrainingSchemaType = {
q: string;
a: string;
chunkIndex: number;
weight: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
};

Expand Down
5 changes: 3 additions & 2 deletions packages/global/core/module/template/system/contextExtract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ export const ContextExtractModule: FlowModuleTemplateType = {
type: FlowNodeInputTypeEnum.textarea,
valueType: ModuleIOValueTypeEnum.string,
label: '提取要求描述',
description: '给AI一些对应的背景知识或要求描述,引导AI更好的完成任务',
description:
'给AI一些对应的背景知识或要求描述,引导AI更好的完成任务。\n该输入框可使用全局变量。',
required: true,
placeholder:
'例如: \n1. 你是一个实验室预约助手,你的任务是帮助用户预约实验室。\n2. 你是谷歌搜索助手,需要从文本中提取出合适的搜索词。',
'例如: \n1. 当前时间为: {{cTime}}。你是一个实验室预约助手,你的任务是帮助用户预约实验室,从文本中获取对应的预约信息。\n2. 你是谷歌搜索助手,需要从文本中提取出合适的搜索词。',
showTargetInApp: true,
showTargetInPlugin: true
},
Expand Down
5 changes: 3 additions & 2 deletions packages/global/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
"name": "@fastgpt/global",
"version": "1.0.0",
"dependencies": {
"axios": "^1.5.1",
"dayjs": "^1.11.7",
"openai": "4.23.0",
"encoding": "^0.1.13",
"js-tiktoken": "^1.0.7",
"axios": "^1.5.1",
"openai": "4.23.0",
"pdfjs-dist": "^4.0.269",
"timezones-list": "^3.0.2"
},
"devDependencies": {
Expand Down
Loading

0 comments on commit 759a233

Please sign in to comment.