readme修改

IvenWang89 · Sep 26, 2020 · 490d7b2 · 490d7b2
1 parent 6ea1b46
commit 490d7b2
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # language-ai
-文章AI伪原创,文章自动生成,NLP,自然语言技术处理。全网首个AI伪原创开源应用类项目。项目正在火热开发中, 请关注核心发行版本发布时间。
+文章AI伪原创,文章自动生成,NLP,自然语言技术处理,DNN语言模型,词义相似度分析。全网首个AI伪原创开源应用类项目。
 
 ## 快速开始
 1. 环境准备：`JDK1.8`, `maven3.6+`, `redis`
@@ -31,7 +31,7 @@ baidu:
 }
 ```
 
-其余参数暂时未作验签, 默认传空串即可
+测试版本未作校验, 所有参数默认为空即可。
 
 ## 源码目录详解
 ```
@@ -68,6 +68,17 @@ language-ai
 - 下载地址：https://www.ltp-cloud.com/download#down_cilin
 - 项目自带词库(csdn下载的)
 
+#### 拓展词库
+> 想要更加精确的计算与替换, 就需要一个很精准庞大的词库, 这个词库大家可以自己慢慢的补充完整
+> 只需要将词库添加进文件 `resource/res/word.txt`, 按照格式进行添加即可, 然后调用初始化redis接口即可。
+初始化redis接口`/ai/command/initRedis`
+
+#### 关于词库中词语重复问题
+这个大家无需担心, 作者在此方面做了大量优化。 相同键值Key的词组,将会全部存储至redis中,以Key0,Key1的形式存储, 
+查询时, 会将所有相同Key的词组全部找出, 并进行去重, 然后在进行其他操作, 计算词义相似度等等。相同的Key,为了提升
+查询效率, 默认取相同Key的前20组!
+
+
 ## 技术图谱
 本项目集成多个NLP优秀项目, 搭配使用。 分词使用百度AI 自然语言处理技术中的词义分析技术, 词义相似度
 使用HanLP项目计算同义词距离!
@@ -90,8 +101,8 @@ HanLP是一系列模型与算法组成的NLP工具包，目标是普及自然语
 
 ##### 后端
 - SpringBoot, 简单配置, 快速开发
-- MyBatis ,  复杂数据操作
-- Spring Data Jpa , 简单数据操作
+- MyBatis ,  复杂数据操作(轻量级版本无需数据库, 提高灵活性)
+- Spring Data Jpa , 简单数据操作(轻量级版本无需数据库, 提高灵活性)
 - SwaggerUI BootstrapSwaggerUI, 在线接口文档, 增强美化, 接口文档导出
 - Redis 数据存储与缓存
 - Async 异步多线程, 提升文章切割替换速度(单核cpu可能效果不太明显)
@@ -107,6 +118,11 @@ HanLP是一系列模型与算法组成的NLP工具包，目标是普及自然语
 > - Q：当文本过长，百度AI接口会抛出异常
 > - A：用户端或者服务端做好文本切片的操作
 
+> - Q: 当用户直接输入文章, 几千字如何处理?
+> - A: 根据文本大小进行切片, 采用异步多线程处理, 提升程序性能
+
+> - Q: 直接粘贴文章进行AI伪原创可能会报json注入异常
+> - A: 建议去掉空格, 回车等。或者换成转义字符。
 
 ## 其他
 为什么会使用多个NLP项目, 原因是因为最初是想使用百度AI将整个项目完成。 但由于百度自然语言处理API

diff --git a/src/main/java/com/chenxin/controller/LexerController.java b/src/main/java/com/chenxin/controller/LexerController.java
@@ -5,10 +5,7 @@
 import com.chenxin.exception.BizException;
 import com.chenxin.model.R;
 import com.chenxin.model.ReqBody;
-import com.chenxin.model.dto.DnnModelOut;
-import com.chenxin.model.dto.LexerOut;
-import com.chenxin.model.dto.SimilarWordDto;
-import com.chenxin.model.dto.TextDto;
+import com.chenxin.model.dto.*;
 import com.chenxin.service.LexerService;
 import com.chenxin.util.CommonEnum;
 import com.chenxin.util.consts.AiConstant;
@@ -79,13 +76,8 @@ public R wordReplace(@RequestBody ReqBody<TextDto> para) {
         }
 
         // DNN语言模型校验
-        DnnModelOut out = getPpl(lexerOut,accessToken);
-        if (out == null) {
-            // DNN计算失败, 直接返回原文本
-            return R.success(para.getParams().getText());
-        }
-
-        return R.success(out.getText()==null?para.getParams():out.getText());
+        ParagraphOut out = getPpl(lexerOut,accessToken);
+        return R.success(out);
     }
 
     @ApiOperation("文章AI伪原创")
@@ -102,20 +94,22 @@ public R articleReplace(@RequestBody ReqBody<TextDto> para) {
         return R.error(CommonEnum.AI_ARTICLE_ERROR);
     }
 
-    private DnnModelOut getPpl(LexerOut lexerOut,String accessToken) {
+    private ParagraphOut getPpl(LexerOut lexerOut,String accessToken) {
         if (lexerOut == null) {
             throw new BizException(CommonEnum.PARAM_ERROR);
         }
         if (StrUtil.isBlank(accessToken)) {
             throw new BizException(CommonEnum.TOKEN_NOT_FOUND);
         }
 
-        String replaceResult = lexerService.sliceSentence(lexerOut).getReplace();
+        ReplaceTextOut rto = lexerService.sliceSentence(lexerOut);
+        String replaceResult = rto.getReplace();
         if (StrUtil.isNotBlank(replaceResult)) {
             // DNN语言模型计算通顺度
-            return lexerService.analyseDnnModel(new TextDto(replaceResult),accessToken);
+            DnnModelOut out =  lexerService.analyseDnnModel(new TextDto(replaceResult),accessToken);
+            return new ParagraphOut(out.getText(),rto.getReplaceCount());
         }
 
-        return null;
+        return new ParagraphOut(lexerOut.getText(),0);
     }
 }
diff --git a/src/main/java/com/chenxin/service/LexerService.java b/src/main/java/com/chenxin/service/LexerService.java
@@ -243,18 +243,36 @@ public ParagraphOut replaceParagraph(TextDto text,String accessToken) throws Exe
         int threadCount = 0;
         if (size>0) {
             log.info("开启多线程, 批量分析, 检测, 替换...");
-            // 有多少个段落, 则开多少线程, 最多不能超过35个线程
-            for (int i = 0; i < size; i++) {
-                Future<ArticleReplaceOut> asyncResult = getReplaceResult(paragraphs.get(i),accessToken);
-                ArticleReplaceOut arOut =asyncResult.get();
+            if (size == 1) {
+                // 一个段落, 则分多个句子进行。 一个句子一个线程
+                String p =  paragraphs.get(0);
+                String[] sentence = p.split(AiConstant.PERIOD);
+                for (String s : sentence) {
+                    Future<ArticleReplaceOut> asyncResult = getReplaceResult(s,accessToken);
+                    ArticleReplaceOut arOut =asyncResult.get();
+
+                    sb.append(arOut.getReplaceText());
+
+                    replaceTotal+=arOut.getReplaceCount();
+                    if (asyncResult.isDone()) {
+                        threadCount ++;
+                    }
+                }
+            }else if (size>1) {
+                // 有多少个段落, 则开多少线程, 最多不能超过35个线程
+                for (int i = 0; i < size; i++) {
+                    Future<ArticleReplaceOut> asyncResult = getReplaceResult(paragraphs.get(i),accessToken);
+                    ArticleReplaceOut arOut =asyncResult.get();
 
-                sb.append(arOut.getReplaceText());
+                    sb.append(arOut.getReplaceText());
 
-                replaceTotal+=arOut.getReplaceCount();
-                if (asyncResult.isDone()) {
-                    threadCount ++;
+                    replaceTotal+=arOut.getReplaceCount();
+                    if (asyncResult.isDone()) {
+                        threadCount ++;
+                    }
                 }
             }
+
         }
 
         while (threadCount < size) {
@@ -265,7 +283,7 @@ public ParagraphOut replaceParagraph(TextDto text,String accessToken) throws Exe
     }
 
     /**
-     * 异步多线程处理
+     * 异步多线程处理段落
      */
     @Async
     private Future<ArticleReplaceOut> getReplaceResult(String sentence,String accessToken) {
@@ -278,7 +296,6 @@ private Future<ArticleReplaceOut> getReplaceResult(String sentence,String access
         // 逐句分析替换 切分的句子默认没有带"。"号
         String[] sen = sentence.split(AiConstant.PERIOD);
         for (String s : sen) {
-            ArticleReplaceOut arOut = new ArticleReplaceOut();
             // 分析词义
             LexerOut lo = analyseLexer(new TextDto(s),accessToken);
             // 替换结果

diff --git a/src/main/resources/static/js/vue.js b/src/main/resources/static/js/vue.js
@@ -11247,7 +11247,7 @@
         // OR when it is inside another scoped slot or v-for (the reactivity may be
         // disconnected due to the intermediate scope variable)
         // #9438, #9506
-        // TODO: this can be further optimized by properly analyzing in-scope bindings
+        //  this can be further optimized by properly analyzing in-scope bindings
         // and skip force updating ones that do not actually use scope variables.
         if (!needsForceUpdate) {
             var parent = el.parent;