feat: 通义千问支持流式输出，另外追加了阿里云百炼支持的几个其他公司的大模型（百川、月之暗面、Yi）

bxwangle · Aug 8, 2024 · 7b8a9f2 · 7b8a9f2
1 parent 9a7fc2a
commit 7b8a9f2
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 17 deletions.
diff --git a/config.json b/config.json
@@ -432,7 +432,8 @@
     "enable_search": true,
     "max_tokens": 1024,
     "history_enable": true,
-    "history_max_len": 300
+    "history_max_len": 300,
+    "stream": true
   },
   "tongyixingchen": {
     "access_token": "此处填写你的密钥",

diff --git a/config.json.bak b/config.json.bak
@@ -432,7 +432,8 @@
     "enable_search": true,
     "max_tokens": 1024,
     "history_enable": true,
-    "history_max_len": 300
+    "history_max_len": 300,
+    "stream": true
   },
   "tongyixingchen": {
     "access_token": "此处填写你的密钥",

diff --git a/utils/gpt_model/tongyi.py b/utils/gpt_model/tongyi.py
@@ -42,11 +42,12 @@ def __init__(self, data):
         except Exception as e:
             logger.error(traceback.format_exc())
 
-    def get_resp(self, prompt):
+    def get_resp(self, prompt, stream=False):
         """请求对应接口，获取返回值
 
         Args:
             prompt (str): 你的提问
+            stream (bool, optional): 是否流式返回. Defaults to False.
 
         Returns:
             str: 返回的文本回答
@@ -71,7 +72,7 @@ def get_resp(self, prompt):
                 from dashscope import Generation
                 from dashscope.api_entities.dashscope_response import Role
 
-                if self.config_data['history_enable'] == False:
+                if self.config_data['history_enable'] is False:
                     # 预设不能为空
                     if self.config_data["preset"] == "":
                         self.config_data["preset"] = "请做为一个人工智能，回答我的问题"
@@ -93,7 +94,16 @@ def get_resp(self, prompt):
                     top_k=self.config_data['top_k'],
                     enable_search=self.config_data['enable_search'],
                     max_tokens=self.config_data['max_tokens'],
+                    stream=stream,
                 )
+
+                if response is None:
+                    return None
+
+                if stream:
+                    # 返回响应
+                    return response
+
                 if response.status_code == HTTPStatus.OK:
                     logger.debug(response)
 
@@ -121,6 +131,34 @@ def get_resp(self, prompt):
             logger.error(traceback.format_exc())
             return None
 
+    # 添加AI返回消息到会话，用于提供上下文记忆
+    def add_assistant_msg_to_session(self, prompt, message):
+        try:
+            if self.config_data["type"] == "api":
+                from dashscope.api_entities.dashscope_response import Role
+
+                # 启用历史就给我记住！
+                if self.config_data['history_enable']:
+                    self.history.append({'role': Role.USER, 'content': prompt})
+                    self.history.append({'role': Role.ASSISTANT, 'content': message})
+                    while True:
+                        # 获取嵌套列表中所有字符串的字符数
+                        total_chars = sum(len(item['content']) for item in self.history if 'content' in item)
+                        # 如果大于限定最大历史数，就剔除第一个元素
+                        if total_chars > int(self.config_data["history_max_len"]):
+                            self.history.pop(0)
+                            self.history.pop(0)
+                        else:
+                            break
+
+                logger.debug(f"history={self.history}")
+
+                return {"ret": True}
+
+            return {"ret": False}
+        except Exception as e:
+            logger.error(traceback.format_exc())
+            return {"ret": False}
 
 if __name__ == '__main__':
     # 配置日志输出格式

diff --git a/utils/my_handle.py b/utils/my_handle.py
@@ -1606,6 +1606,7 @@ def llm_stream_handle_and_audio_synthesis(self, chat_type, data, type="chat", we
                 chat_model_methods = {
                     "chatgpt": lambda: self.chatgpt.get_gpt_resp(data["username"], data["content"], stream=True),
                     "zhipu": lambda: self.zhipu.get_resp(data["content"], stream=True),
+                    "tongyi": lambda: self.tongyi.get_resp(data["content"], stream=True),
                 }
             elif type == "vision":
                 pass
@@ -1628,21 +1629,35 @@ def split_by_chinese_punctuation(s):
 
             if resp is not None:
                 tmp = ""
+                # 已经切掉的字符长度，针对一些特殊llm的流式输出，需要去掉前面的字符
+                cut_len = 0
                 for chunk in resp:
+                    # logger.warning(chunk)
                     if chat_type in ["chatgpt", "zhipu"]:
+                        # 流式的内容是追加形式的
                         tmp += chunk.choices[0].delta.content
                         resp_content += chunk.choices[0].delta.content
+                    elif chat_type in ["tongyi"]:
+                        # 这个是一直输出全部的内容，所以要切分掉已经处理的文本长度
+                        tmp = chunk.output.choices[0].message.content[cut_len:]
+                        resp_content = chunk.output.choices[0].message.content
+
 
                     # 用于切分，根据中文标点符号切分语句
                     resp_json = split_by_chinese_punctuation(tmp)
                     if resp_json["ret"]:
                         # 切出来的句子
                         tmp_content = resp_json["content1"]
-                        # 标点符号后的内容包留，用于之后继续追加内容
-                        tmp = resp_json["content2"]
+
                         logger.warning(f"句子生成：{tmp_content}")
 
-
+                        if chat_type in ["chatgpt", "zhipu"]:
+                            # 标点符号后的内容包留，用于之后继续追加内容
+                            tmp = resp_json["content2"]
+                        elif chat_type in ["tongyi"]:
+                            # 记录 并追加切出的文本长度
+                            cut_len += len(tmp_content)
+
                         """
                         双重过滤，为您保驾护航
                         """
@@ -1698,11 +1713,15 @@ def split_by_chinese_punctuation(s):
 
                         self.audio_synthesis_handle(message)
 
-                    # logger.info(chunk)
-                    if chunk.choices[0].finish_reason == "stop":
-                        logger.info("流式接收完毕")
-                        break
-
+                    if chat_type in ["chatgpt", "zhipu"]:
+                        # logger.info(chunk)
+                        if chunk.choices[0].finish_reason == "stop":
+                            logger.info("流式接收完毕")
+                            break
+                    elif chat_type in ["tongyi"]:
+                        if chunk.output.choices[0].finish_reason == "stop":
+                            logger.info("流式接收完毕")
+                            break
 
             # 返回为空，触发异常报警
             else:
@@ -1720,6 +1739,7 @@ def split_by_chinese_punctuation(s):
                 chat_model_methods = {
                     "chatgpt": lambda: self.chatgpt.add_assistant_msg_to_session(data["username"], resp_content),
                     "zhipu": lambda: self.zhipu.add_assistant_msg_to_session(content_bak, resp_content),
+                    "tongyi": lambda: self.tongyi.add_assistant_msg_to_session(content_bak, resp_content),
                 }
             elif type == "vision":
                 pass

diff --git a/webui.py b/webui.py
@@ -2076,6 +2076,7 @@ def common_textarea_handle(content):
                     config_data["tongyi"]["enable_search"] = switch_tongyi_enable_search.value
                     config_data["tongyi"]["history_enable"] = switch_tongyi_history_enable.value
                     config_data["tongyi"]["history_max_len"] = int(input_tongyi_history_max_len.value)
+                    config_data["tongyi"]["stream"] = switch_tongyi_stream.value
 
                 if config.get("webui", "show_card", "llm", "tongyixingchen"):
                     config_data["tongyixingchen"]["access_token"] = input_tongyixingchen_access_token.value
@@ -2942,7 +2943,7 @@ def save_config():
         'qanything': 'QAnything',
         'koboldcpp': 'koboldcpp',
         'anythingllm': 'AnythingLLM',
-        'tongyi': '通义千问',
+        'tongyi': '通义千问/阿里云百炼',
         'gpt4free': 'GPT4Free',
         'dify': 'Dify',
         'llm_tpu': 'LLM_TPU',
@@ -4487,7 +4488,7 @@ def anythingllm_get_workspaces_list():
 
             if config.get("webui", "show_card", "llm", "tongyi"):           
                 with ui.card().style(card_css):
-                    ui.label("通义千问")
+                    ui.label("通义千问/阿里云百炼")
                     with ui.row():
                         lines = ['web', 'api']
                         data_json = {}
@@ -4501,14 +4502,31 @@ def anythingllm_get_workspaces_list():
                         input_tongyi_cookie_path = ui.input(label='cookie路径', placeholder='web类型下，通义千问登录后，通过浏览器插件Cookie Editor获取Cookie JSON串，然后将数据保存在这个路径的文件中', value=config.get("tongyi", "cookie_path"))
                         input_tongyi_cookie_path.style("width:400px")
                     with ui.row():
-                        lines = ['qwen-turbo', 'qwen-plus', 'qwen-max']
+                        lines = [
+                            'qwen-turbo', 
+                            'qwen-plus', 
+                            'qwen-long', 
+                            'qwen-max-longcontext', 
+                            'qwen-max', 
+                            'qwen-max-0428', 
+                            'baichuan2-turbo', 
+                            'moonshot-v1-8k', 
+                            'moonshot-v1-32k', 
+                            'moonshot-v1-128k',
+                            'yi-large',
+                            'yi-large-turbo',
+                            'yi-medium',
+                        ]
                         data_json = {}
                         for line in lines:
                             data_json[line] = line
                         select_tongyi_model = ui.select(
                             label='类型', 
                             options=data_json, 
-                            value=config.get("tongyi", "model")
+                            value=config.get("tongyi", "model"),
+                            with_input=True,
+                            new_value_mode='add-unique',
+                            clearable=True
                         ).style("width:150px")
                         input_tongyi_api_key = ui.input(label='密钥', value=config.get("tongyi", "api_key"), placeholder='API类型下，DashScope平台申请的API密钥')
                         input_tongyi_preset = ui.input(label='预设', placeholder='API类型下，用于指定一组预定义的设置，以便模型更好地适应特定的对话场景。', value=config.get("tongyi", "preset")).style("width:500px") 
@@ -4520,7 +4538,8 @@ def anythingllm_get_workspaces_list():
                     with ui.row():
                         switch_tongyi_history_enable = ui.switch('上下文记忆', value=config.get("tongyi", "history_enable")).style(switch_internal_css)
                         input_tongyi_history_max_len = ui.input(label='最大记忆长度', value=config.get("tongyi", "history_max_len"), placeholder='最长能记忆的问答字符串长度，超长会丢弃最早记忆的内容，请慎用！配置过大可能会有丢大米')
-
+                        switch_tongyi_stream = ui.switch('流式输出', value=config.get("tongyi", "stream")).tooltip("是否开启流式输出，开启后，回答会逐句输出，关闭后，回答会一次性输出。")
+
             if config.get("webui", "show_card", "llm", "gpt4free"):
                 with ui.card().style(card_css):
                     ui.label("GPT4Free")