From ee2d996400cf246fbb2a16426ea0d0af1584d258 Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Wed, 20 Dec 2023 10:15:34 +0800
Subject: [PATCH 01/24] feat:add gemini model

---
 README.md                   |  11 +++
 adapters/adapter_factory.py |   3 +
 adapters/gemini_adapter.py  | 177 ++++++++++++++++++++++++++++++++++++
 test.py                     |   6 +-
 utils/http_util.py          |  15 ++-
 5 files changed, 206 insertions(+), 6 deletions(-)
 create mode 100644 adapters/gemini_adapter.py

diff --git a/README.md b/README.md
index 32814b0..da52478 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@
   - [ ] bingchat
   - [ ] 百度文心一言
   - [x] 讯飞星火
+  - [x] gemini
   - [ ] ...
 - [x] 支持stream方式调用
 - [x] 支持open ai的第三方代理服务，比如openai-sb等
@@ -170,5 +171,15 @@
                 "router-round-robin": "7c7aa4a3549f12"
             }
         }
+    },
+    {
+        "token": "gemini-7c7aa4a3549f5",
+        "type": "gemini",
+        "config": {
+            "api_key": "xxxxx",
+            "proxies": {
+                "https": "http://localhost:7890"
+            }
+        }
     }
     ]
\ No newline at end of file
diff --git a/adapters/adapter_factory.py b/adapters/adapter_factory.py
index 2d80b3e..7ea214c 100644
--- a/adapters/adapter_factory.py
+++ b/adapters/adapter_factory.py
@@ -8,6 +8,7 @@
 from adapters.xunfei_spark import XunfeiSparkAPIModel
 from adapters.router_adapter import RouterAdapter
 from adapters.model_name_router_adapter import ModelNameRouterAdapter
+from adapters.gemini_adapter import GeminiAdapter
 
 model_instance_dict = {}
 
@@ -45,6 +46,8 @@ def init_adapter(instanceKey: str, type: str, **kwargs) -> ModelAdapter:
             model = RouterAdapter(factory_method=get_adapter, **kwargs)
         elif type == "model-name-router":
             model = ModelNameRouterAdapter(factory_method=get_adapter, **kwargs)
+        elif type == "gemini":
+            model = GeminiAdapter(**kwargs)
         else:
             raise ValueError(f"unknown model type: {type}")
     except Exception as e:
diff --git a/adapters/gemini_adapter.py b/adapters/gemini_adapter.py
new file mode 100644
index 0000000..c230282
--- /dev/null
+++ b/adapters/gemini_adapter.py
@@ -0,0 +1,177 @@
+import json
+import time
+from typing import Dict, Iterator, List
+import uuid
+from adapters.base import ModelAdapter
+from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse, ChatMessage
+import requests
+from utils.http_util import post, stream
+from loguru import logger
+from utils.util import num_tokens_from_string
+
+"""
+ curl -x http://127.0.0.1:7890 https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key= \
+    -H 'Content-Type: application/json' \
+    -X POST \
+    -d '{
+      "contents": [
+        {"role":"user",
+         "parts":[{
+           "text": "你好"}]},
+        {"role": "model",
+         "parts":[{
+           "text": "你好"}]},
+        {"role": "user",
+         "parts":[{
+           "text": "你是谁？"}]},
+      ]
+    }'
+
+
+{
+  "candidates": [
+    {
+      "content": {
+        "parts": [
+          {
+            "text": "In the tranquil village of Étoiles-sur-Mer, nestled amidst the rolling hills of 17th-century France, lived a young girl named Marie. She was known for her kind heart, inquisitive nature, and an extraordinary bond with a magical backpack she inherited from her grandmother."
+          }
+        ],
+        "role": "model"
+      },
+      "finishReason": "STOP",
+      "index": 0,
+      "safetyRatings": [
+        {
+          "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+          "probability": "NEGLIGIBLE"
+        },
+        {
+          "category": "HARM_CATEGORY_HATE_SPEECH",
+          "probability": "NEGLIGIBLE"
+        },
+        {
+          "category": "HARM_CATEGORY_HARASSMENT",
+          "probability": "NEGLIGIBLE"
+        },
+        {
+          "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+          "probability": "NEGLIGIBLE"
+        }
+      ]
+    }
+  ],
+  "promptFeedback": {
+    "safetyRatings": [
+      {
+        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "probability": "NEGLIGIBLE"
+      },
+      {
+        "category": "HARM_CATEGORY_HATE_SPEECH",
+        "probability": "NEGLIGIBLE"
+      },
+      {
+        "category": "HARM_CATEGORY_HARASSMENT",
+        "probability": "NEGLIGIBLE"
+      },
+      {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "probability": "NEGLIGIBLE"
+      }
+    ]
+  }
+}
+"""
+
+
+class GeminiAdapter(ModelAdapter):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.api_key = kwargs.pop("api_key", None)
+        self.prompt = kwargs.pop(
+            "prompt", "You need to follow the system settings:{system}"
+        )
+        self.proxies = kwargs.pop("proxies", None)
+        self.model = "gemini-pro"
+        self.config_args = kwargs
+
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
+        method = "generateContent"
+        headers = {"Content-Type": "application/json"}
+        # if request.stream:
+        #     method = "streamGenerateContent"
+        url = (
+            f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:{method}?key="
+            + self.api_key
+        )
+        params = self.convert_2_gemini_param(request)
+        response = post(url, headers=headers, proxies=self.proxies, params=params)
+        yield ChatCompletionResponse(**self.response_convert(response))
+
+    def response_convert(self, data):
+        completion = data["candidates"][0]["content"]["parts"][0]["text"]
+        completion_tokens = num_tokens_from_string(completion)
+        openai_response = {
+            "id": str(uuid.uuid1()),
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": self.model,
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": completion_tokens,
+                "total_tokens": completion_tokens,
+            },
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": completion,
+                    },
+                    "index": 0,
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        return openai_response
+
+    """
+    [
+        {"role":"user",
+         "parts":[{
+           "text": "你好"}]},
+        {"role": "model",
+         "parts":[{
+           "text": "你好"}]},
+        {"role": "user",
+         "parts":[{
+           "text": "你是谁？"}]},
+      ]
+    """
+
+    def convert_messages_to_prompt(
+        self, messages: List[ChatMessage]
+    ) -> List[Dict[str, str]]:
+        prompt = []
+        for message in messages:
+            role = message.role
+            if role in ["function"]:
+                raise Exception(f"不支持的功能:{role}")
+            if role == "system":  # 将system转为user   这里可以使用  CharacterGLM
+                role = "user"
+                content = self.prompt.format(system=message.content)
+                prompt.append({"role": role, "parts": [{"text": content}]})
+                prompt.append({"role": "model", "parts": [{"text": "ok"}]})
+            elif role == "assistant":
+                prompt.append({"role": "model", "parts": [{"text": message.content}]})
+            else:
+                content = message.content
+                prompt.append({"role": role, "parts": [{"text": content}]})
+        return prompt
+
+    def convert_2_gemini_param(self, request: ChatCompletionRequest):
+        contents = self.convert_messages_to_prompt(request.messages)
+        param = {"contents": contents}
+        return param
diff --git a/test.py b/test.py
index 004ef8b..88e4ba3 100644
--- a/test.py
+++ b/test.py
@@ -43,8 +43,8 @@ def multiple_messages_test(**kwargs):
     openai.api_key = api_key
     single_message_test()
     time.sleep(2)
-    # single_message_test(stream=True)
-    # time.sleep(2)
-    # multiple_messages_test()
+    single_message_test(stream=True)
+    time.sleep(2)
+    multiple_messages_test()
     # time.sleep(2)
     # multiple_messages_test(stream=True)
diff --git a/utils/http_util.py b/utils/http_util.py
index 566a6b3..d1dfd6d 100644
--- a/utils/http_util.py
+++ b/utils/http_util.py
@@ -5,11 +5,17 @@
 api_timeout_seconds = 300
 
 
-def post(api_url, headers: dict, params: dict, timeout=api_timeout_seconds):
+def post(
+    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
+):
     resp = None
     try:
         resp = requests.post(
-            url=api_url, headers=headers, data=json.dumps(params), timeout=timeout
+            url=api_url,
+            headers=headers,
+            data=json.dumps(params),
+            timeout=timeout,
+            proxies=proxies,
         )
         if requests.codes.ok != resp.status_code:
             logger.error(f"响应异常：{resp_text(resp)}")
@@ -31,7 +37,9 @@ def resp_text(resp):
     return resp_str
 
 
-def stream(api_url, headers: dict, params: dict, timeout=api_timeout_seconds):
+def stream(
+    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
+):
     resp = None
     try:
         resp = requests.post(
@@ -41,6 +49,7 @@ def stream(api_url, headers: dict, params: dict, timeout=api_timeout_seconds):
             json=params,
             # data=json.dumps(params),
             timeout=timeout,
+            proxies=proxies,
         )
         if requests.codes.ok != resp.status_code:
             logger.error(f"响应异常：{resp.text}")

From 2b1741df8559103000d28cabb55357b1535c56c2 Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Thu, 21 Dec 2023 15:18:07 +0800
Subject: [PATCH 02/24] fix:stream

---
 adapters/claude_web.py     | 62 +++++++++++++++++++++++++++++---------
 adapters/gemini_adapter.py | 32 +++++++++++++++++++-
 adapters/xunfei_spark.py   |  2 +-
 3 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/adapters/claude_web.py b/adapters/claude_web.py
index a005d7a..8c6b687 100644
--- a/adapters/claude_web.py
+++ b/adapters/claude_web.py
@@ -8,19 +8,21 @@
 
 
 class ClaudeWebModel(ModelAdapter):
-
     def __init__(self, **kwargs):
         self.cookie = kwargs.pop("cookie", None)
         self.proxies = kwargs.pop("proxies", None)
         self.client = ClaudeWebClient(self.cookie, proxies=self.proxies)
-        self.prompt = kwargs.pop("prompt", "The information in [] is the context of the conversation. \
+        self.prompt = kwargs.pop(
+            "prompt",
+            "The information in [] is the context of the conversation. \
                                  Please ignore the JSON format of the context \
-                                 during the conversation and answer the user's latest conversation: {newMessage} \n {history}")
+                                 during the conversation and answer the user's latest conversation: {newMessage} \n {history}",
+        )
         self.single_conversation = kwargs.pop("single_conversation", False)
         if self.single_conversation:
             self.conversation_id = kwargs.pop("conversation_id", None)
             if self.conversation_id is None:
-                self.conversation_id = self.client.create_new_chat()['uuid']
+                self.conversation_id = self.client.create_new_chat()["uuid"]
         self.config_args = kwargs
 
     def convertOpenAIParams2ClaudePrompt(self, request: ChatCompletionRequest) -> str:
@@ -28,25 +30,57 @@ def convertOpenAIParams2ClaudePrompt(self, request: ChatCompletionRequest) -> st
         if len(messages) < 2:
             return messages[0].content
         newMessage = messages[-1].content
-        history = [message.model_dump(exclude_none=True)
-                   for message in messages[:len(messages)-1]]
+        history = [
+            message.model_dump(exclude_none=True)
+            for message in messages[: len(messages) - 1]
+        ]
         return self.prompt.format(newMessage=newMessage, history=history)
 
-    def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
         claudePrompt = self.convertOpenAIParams2ClaudePrompt(request)
         conversation_id = self.conversation_id
         if not self.single_conversation:
-            conversation_id = self.client.create_new_chat()['uuid']
+            conversation_id = self.client.create_new_chat()["uuid"]
         response = self.client.send_message(claudePrompt, conversation_id)
-        resp = self.claude_to_openai_response(response)
+        if request.stream:  # 假的stream
+            resp = self.claude_to_openai_stream_response(response)
+        else:
+            resp = self.claude_to_openai_response(response)
         logger.info(
-            f"ClaudeWebModel req:{request}, conversation_id:{conversation_id}, claudePrompt:{claudePrompt} ,resp:{resp}")
+            f"ClaudeWebModel req:{request}, conversation_id:{conversation_id}, claudePrompt:{claudePrompt} ,resp:{resp}"
+        )
         yield ChatCompletionResponse(**resp)
 
+    def claude_to_openai_stream_response(self, completion: str):
+        completion_tokens = num_tokens_from_string(completion)
+        openai_response = {
+            "id": f"chatcmpl-{str(time.time())}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": "claude-2",
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": completion_tokens,
+                "total_tokens": completion_tokens,
+            },
+            "choices": [
+                {
+                    "delta": {
+                        "role": "assistant",
+                        "content": completion,
+                    },
+                    "index": 0,
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+
+        return openai_response
+
     def claude_to_openai_response(self, completion: str):
-        completion_tokens = num_tokens_from_string(
-            completion
-        )
+        completion_tokens = num_tokens_from_string(completion)
         openai_response = {
             "id": f"chatcmpl-{str(time.time())}",
             "object": "chat.completion",
@@ -64,7 +98,7 @@ def claude_to_openai_response(self, completion: str):
                         "content": completion,
                     },
                     "index": 0,
-                    "finish_reason": "stop"
+                    "finish_reason": "stop",
                 }
             ],
         }
diff --git a/adapters/gemini_adapter.py b/adapters/gemini_adapter.py
index c230282..d2fbe68 100644
--- a/adapters/gemini_adapter.py
+++ b/adapters/gemini_adapter.py
@@ -109,7 +109,37 @@ def chat_completions(
         )
         params = self.convert_2_gemini_param(request)
         response = post(url, headers=headers, proxies=self.proxies, params=params)
-        yield ChatCompletionResponse(**self.response_convert(response))
+        if request.stream:  # 假的stream
+            openai_response = self.response_convert_stream(response)
+        else:
+            openai_response = self.response_convert(response)
+        yield ChatCompletionResponse(**openai_response)
+
+    def response_convert_stream(self, data):
+        completion = data["candidates"][0]["content"]["parts"][0]["text"]
+        completion_tokens = num_tokens_from_string(completion)
+        openai_response = {
+            "id": str(uuid.uuid1()),
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": self.model,
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": completion_tokens,
+                "total_tokens": completion_tokens,
+            },
+            "choices": [
+                {
+                    "delta": {
+                        "role": "assistant",
+                        "content": completion,
+                    },
+                    "index": 0,
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        return openai_response
 
     def response_convert(self, data):
         completion = data["candidates"][0]["content"]["parts"][0]["text"]
diff --git a/adapters/xunfei_spark.py b/adapters/xunfei_spark.py
index 981c92b..b1e0396 100644
--- a/adapters/xunfei_spark.py
+++ b/adapters/xunfei_spark.py
@@ -136,7 +136,7 @@ def client_response_to_chatgpt_response(self, iter_resp):
             },
             "choices": [
                 {
-                    "delta": {
+                    "message": {
                         "role": "assistant",
                         "content": "".join(completions),
                     },

From 62c525dd4188812467596dc868b7b63c49202949 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Mon, 29 Jan 2024 17:18:55 +0800
Subject: [PATCH 03/24] Update README.md

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index da52478..0330422 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,9 @@
 
 # openai-style-api
 
+
+ ***欢迎有兴趣的大佬提PR***
+
 ## 用途
 屏蔽不同大模型API的差异，统一用openai api标准格式使用大模型, 也可以用来做api-key的二次分发管理; 配置化管理不同大模型调用参数，让你在使用大模型的时候只需关注 api-key 和 messages
 
@@ -182,4 +185,4 @@
             }
         }
     }
-    ]
\ No newline at end of file
+    ]

From 412cd0129b0b1b2d58d6d93843a1091daee84005 Mon Sep 17 00:00:00 2001
From: Matrix42 <Matrix42@users.noreply.github.com>
Date: Wed, 31 Jan 2024 14:37:22 +0800
Subject: [PATCH 04/24] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=AE=AF=E9=A3=9E?=
 =?UTF-8?q?=E6=98=9F=E7=81=AB3.5=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 clients/xunfei_spark/api/spark_api.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clients/xunfei_spark/api/spark_api.py b/clients/xunfei_spark/api/spark_api.py
index 41ae9ca..e1b06be 100644
--- a/clients/xunfei_spark/api/spark_api.py
+++ b/clients/xunfei_spark/api/spark_api.py
@@ -27,6 +27,9 @@
     "v3.0": {
         "domain": "generalv3",
         "url": "wss://spark-api.xf-yun.com/v3.1/chat",
+    },"v3.5": {
+        "domain": "generalv3.5",
+        "url": "wss://spark-api.xf-yun.com/v3.5/chat",
     },
 }
 

From b8806fb098ec7065ef2245c2fc9f2dfe9238ddd4 Mon Sep 17 00:00:00 2001
From: Matrix42 <Matrix42@users.noreply.github.com>
Date: Thu, 1 Feb 2024 17:15:34 +0800
Subject: [PATCH 05/24] Update spark_api.py

---
 clients/xunfei_spark/api/spark_api.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clients/xunfei_spark/api/spark_api.py b/clients/xunfei_spark/api/spark_api.py
index e1b06be..d3fe083 100644
--- a/clients/xunfei_spark/api/spark_api.py
+++ b/clients/xunfei_spark/api/spark_api.py
@@ -27,7 +27,8 @@
     "v3.0": {
         "domain": "generalv3",
         "url": "wss://spark-api.xf-yun.com/v3.1/chat",
-    },"v3.5": {
+    },
+    "v3.5": {
         "domain": "generalv3.5",
         "url": "wss://spark-api.xf-yun.com/v3.5/chat",
     },

From bc11a044d9ea27bc4ab50585837c6370e4036b5a Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Sun, 4 Feb 2024 15:02:11 +0800
Subject: [PATCH 06/24] feat: sydney

---
 .gitignore                   |   3 +-
 .vscode/settings.json        |   7 +-
 README.md                    |  19 +-
 README_EN.md                 | 143 -------
 adapters/adapter_factory.py  |   3 +
 adapters/base.py             |  65 ++-
 adapters/bing_sydney.py      |  69 +++
 clients/__init__.py          |   0
 clients/sydney/__init__.py   |   1 +
 clients/sydney/constants.py  |  52 +++
 clients/sydney/enums.py      | 173 ++++++++
 clients/sydney/exceptions.py |  34 ++
 clients/sydney/sydney.py     | 797 +++++++++++++++++++++++++++++++++++
 clients/sydney/utils.py      |  28 ++
 model-config.template        |   8 +
 tests/sydney_test.py         |  32 ++
 test.py => tests/test.py     |   8 +-
 17 files changed, 1282 insertions(+), 160 deletions(-)
 delete mode 100644 README_EN.md
 create mode 100644 adapters/bing_sydney.py
 create mode 100644 clients/__init__.py
 create mode 100644 clients/sydney/__init__.py
 create mode 100644 clients/sydney/constants.py
 create mode 100644 clients/sydney/enums.py
 create mode 100644 clients/sydney/exceptions.py
 create mode 100644 clients/sydney/sydney.py
 create mode 100644 clients/sydney/utils.py
 create mode 100644 tests/sydney_test.py
 rename test.py => tests/test.py (95%)

diff --git a/.gitignore b/.gitignore
index 284fe04..b73b35c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-model-config.json
\ No newline at end of file
+model-config.json
+*.ini
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
index d99f2f3..047fb19 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,5 +2,10 @@
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter"
     },
-    "python.formatting.provider": "none"
+    "python.formatting.provider": "none",
+    "python.testing.pytestArgs": [
+        "tests",
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index 0330422..4550f3e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,3 @@
-<p align="right">
-   <strong>中文</strong> | <a href="./README_EN.md">English</a>
-</p>
 
 # openai-style-api
 
@@ -10,7 +7,6 @@
 ## 用途
 屏蔽不同大模型API的差异，统一用openai api标准格式使用大模型, 也可以用来做api-key的二次分发管理; 配置化管理不同大模型调用参数，让你在使用大模型的时候只需关注 api-key 和 messages
 
- ***README_EN.md may not have been updated in a timely manner***
 ## 功能
 
 - [x] 支持多种大模型，当前已支持
@@ -19,7 +15,7 @@
   - [x] claude-api 【api申请在等待列表，暂未测试】
   - [x] claude-web (将web端功能封装成openai api)
   - [x] 智谱ai
-  - [ ] bingchat
+  - [x] bingchat(copilot)
   - [ ] 百度文心一言
   - [x] 讯飞星火
   - [x] gemini
@@ -184,5 +180,18 @@
                 "https": "http://localhost:7890"
             }
         }
+    },
+    {
+        "token": "bing-7c7aa4a3549f5",
+        "type": "bing-sydney",
+        "config": {
+            "cookie": "xxxxx",
+            "style": "balanced"
+        }
     }
     ]
+
+
+## 项目部分代码来自于以下开源项目，感谢🙏
+https://github.com/vsakkas/sydney.py
+https://github.com/suqingdong/sparkapi
\ No newline at end of file
diff --git a/README_EN.md b/README_EN.md
deleted file mode 100644
index 036c05c..0000000
--- a/README_EN.md
+++ /dev/null
@@ -1,143 +0,0 @@
-<p align="right">
-   <a href="./README.md">中文</a> | <strong>English</strong> 
-</p>
-
-# openai-style-api
-
-## Purpose
-Shield the differences between different large model APIs and use large models in a unified openai API standard format; Manage different large model call parameters in a configurable way, so that you only need to care about api-key and messages when using large models.
-
-## Features
-
-
-- Support multiple large models, currently supported
-  - [x] openai
-  - [x] azure open ai
-  - [x] claude-api 【api application is on the waiting list, not tested yet】
-  - [x] claude-web (encapsulate web functions into openai api)
-  - [x] 智谱ai
-- Support stream mode calling
-- Support third-party proxy services for open ai, such as openai-sb
-
-## TODO
-
-- [ ] Configuration update interface
-- [ ] Support more large models
-  - [ ] bingchat
-  - [x] 智谱ai
-  - [ ] 百度文心一言
-  - [ ] 讯飞星火
-  - [ ] ...
-  
-## Quick start
-
-1. Clone the project
-2. `cp model-config.template model-config.json` and modify the configuration file model-config.json as needed
-
-        {
-          "token": "f2b7295fc440db7f",
-          "type": "azure",
-          "config": {
-              "api_base": "https://xxxx.openai.azure.com/",
-              "deployment_id": "xxxx",
-              "api_version": "2023-05-15",
-              "api_key": "xxxx",
-              "temperature": 0.8
-          }
-        }
-        
-3. For local deployment, run `pip install -r requirements.txt` and then run `python open-api.py`. For docker deployment, execute `docker compose up -d` in the directory.
-4. With api-base: localhost:8090 and api-key: f2b7295fc440db7f, you can start using it. Here are some examples:
-
-## Usage
-
-### curl
-
-    curl http://localhost:8090/v1/chat/completions \
-          -H "Content-Type: application/json" \
-          -H "Authorization: Bearer f2b7295fc440db7f" \
-          -d '{
-            "messages": [
-              {
-                "role": "system",
-                "content": "You are a helpful assistant."
-              },
-              {
-                "role": "user",
-                "content": "Hello!"
-              }
-            ]
-          }'
-
-### Call with openai library
-
-    import openai
-
-    openai.api_key = "f2b7295fc440db7f"
-    openai.api_base = "http://localhost:8090/v1"
-
-    completion = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
-    print(completion.choices[0].message.content)
-
-### Third party applications 
-
-[ChatGPT Next Web](https://github.com/Yidadaa/ChatGPT-Next-Web)
-![Alt text](img/image.png)
-
-
-## config example
-    [
-    {
-        "token": "f2b7295fc440db7f",
-        "type": "azure",
-        "config": {
-            "api_base": "https://xxxx.openai.azure.com/",
-            "deployment_id": "gpt-35-turbo",
-            "api_version": "2023-05-15",
-            "api_key": "xxxxxx",
-            "temperature": 0.8
-        }
-    },
-    {
-        "token": "GxqT3BlbkFJj",
-        "type": "openai",
-        "config": {
-            "api_base": "https://api.openai.com/v1/",
-            "api_key": "sk-xxxxxx",
-            "model": "gpt-3.5-turbo"
-        }
-    },
-    {
-        "token": "sb-ede1529390cc",
-        "type": "proxy",
-        "config": {
-            "api_base": "https://api.openai-sb.com/v1/",
-            "api_key": "sb-xxxxxx",
-            "model": "gpt-3.5-turbo"
-        }
-    },
-    {
-        "token": "c115c8f5082",
-        "type": "claude-web",
-        "config": {
-            "cookie": "xxxxxx",
-            "proxies": {
-                "https": "http://localhost:7890"
-            },
-            "conversation_id": "xxxxxx",
-            "prompt": "The information in [] is the context of the conversation. Please ignore the JSON format of the context during the conversation and answer the user's latest conversation: {newMessage} \n {history}",
-            "single_conversation": true
-        }
-    },
-    {
-        "token": "7c7aa4a3549f5",
-        "type": "zhipu-api",
-        "config": {
-            "api_key": "xxxxxx",
-            "model": "chatglm_lite",
-            "temperature": 0.8,
-            "top_p": 0.7
-        }
-    }
-]
\ No newline at end of file
diff --git a/adapters/adapter_factory.py b/adapters/adapter_factory.py
index 7ea214c..957b48e 100644
--- a/adapters/adapter_factory.py
+++ b/adapters/adapter_factory.py
@@ -9,6 +9,7 @@
 from adapters.router_adapter import RouterAdapter
 from adapters.model_name_router_adapter import ModelNameRouterAdapter
 from adapters.gemini_adapter import GeminiAdapter
+from adapters.bing_sydney import BingSydneyModel
 
 model_instance_dict = {}
 
@@ -48,6 +49,8 @@ def init_adapter(instanceKey: str, type: str, **kwargs) -> ModelAdapter:
             model = ModelNameRouterAdapter(factory_method=get_adapter, **kwargs)
         elif type == "gemini":
             model = GeminiAdapter(**kwargs)
+        elif type == "bing-sydney":
+            model = BingSydneyModel(**kwargs)
         else:
             raise ValueError(f"unknown model type: {type}")
     except Exception as e:
diff --git a/adapters/base.py b/adapters/base.py
index 53664e9..e17a1a3 100644
--- a/adapters/base.py
+++ b/adapters/base.py
@@ -1,17 +1,70 @@
-
-
+import time
 from typing import Union, Iterator
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
+from utils.util import num_tokens_from_string
 
 
 class ModelAdapter:
-
     def __init__(self, **kwargs):
         pass
 
-    def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
         """
-       返回一个迭代器对象
-        stream为false   第一个就是结果
+        返回一个迭代器对象
+         stream为false   第一个就是结果
         """
         pass
+
+    def completion_to_openai_response(self, completion: str, model: str = "default"):
+        completion_tokens = num_tokens_from_string(completion)
+        openai_response = {
+            "id": f"chatcmpl-{str(time.time())}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": model,
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": completion_tokens,
+                "total_tokens": completion_tokens,
+            },
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": completion,
+                    },
+                    "index": 0,
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        return openai_response
+
+    def completion_to_openai_stream_response(
+        self, completion: str, model: str = "default"
+    ):
+        completion_tokens = num_tokens_from_string(completion)
+        openai_response = {
+            "id": f"chatcmpl-{str(time.time())}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": model,
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": completion_tokens,
+                "total_tokens": completion_tokens,
+            },
+            "choices": [
+                {
+                    "delta": {
+                        "role": "assistant",
+                        "content": completion,
+                    },
+                    "index": 0,
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        return openai_response
diff --git a/adapters/bing_sydney.py b/adapters/bing_sydney.py
new file mode 100644
index 0000000..3ae9e16
--- /dev/null
+++ b/adapters/bing_sydney.py
@@ -0,0 +1,69 @@
+import asyncio
+from typing import Iterator
+from adapters.base import ModelAdapter
+from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
+from clients.sydney import SydneyClient
+from loguru import logger
+
+
+class BingSydneyModel(ModelAdapter):
+    def __init__(self, **kwargs):
+        self.cookie = kwargs.pop("cookie")
+        self.style = kwargs.pop("style")
+
+        self.proxy = kwargs.pop("proxy", None)
+        # 没找到合适的prompt 此prompt返回结果   My mistake, I can’t give a response to that right now. Let’s try a different topic.
+        # self.prompt = kwargs.pop(
+        #     "prompt",
+        #     "The information in [] is the context of the conversation. \
+        #                          Please ignore the JSON format of the context \
+        #                          during the conversation and answer the user's latest conversation: {newMessage} \n {history}",
+        # )
+        self.config_args = kwargs
+
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
+        """
+        返回一个迭代器对象
+         stream为false   第一个就是结果
+        """
+
+        if request.stream:
+            result = asyncio.run(self.__chat_stream_help(request))
+            for item in result:
+                logger.info(item)
+                yield ChatCompletionResponse(
+                    **self.completion_to_openai_stream_response(item, request.model)
+                )
+        else:
+            async_gen = self.__chat_help(request)
+            result = asyncio.run(async_gen)
+            logger.info(result)
+            yield ChatCompletionResponse(
+                **self.completion_to_openai_response(result, request.model)
+            )
+
+    def convertOpenAIParams2Prompt(self, request: ChatCompletionRequest) -> str:
+        messages = request.messages
+        if len(messages) < 2:
+            return messages[0].content
+        # 暂不支持 历史message， 默认取最近的message
+        msg = messages[-1].content
+        logger.warning(f"暂不支持对话历史，取最近一条对话记录：{msg}")
+        return msg
+
+    async def __chat_help(self, request: ChatCompletionRequest):
+        prompt = self.convertOpenAIParams2Prompt(request)
+        logger.info("prompt:{}".format(prompt))
+        async with SydneyClient(self.style, self.cookie, self.proxy) as client:
+            completion = await client.ask(prompt)
+            return completion
+
+    async def __chat_stream_help(self, request: ChatCompletionRequest):
+        prompt = self.convertOpenAIParams2Prompt(request)
+        logger.info("prompt:{}".format(prompt))
+        async with SydneyClient(self.style, self.cookie, self.proxy) as client:
+            return [
+                response_token async for response_token in client.ask_stream(prompt)
+            ]
diff --git a/clients/__init__.py b/clients/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/clients/sydney/__init__.py b/clients/sydney/__init__.py
new file mode 100644
index 0000000..360f973
--- /dev/null
+++ b/clients/sydney/__init__.py
@@ -0,0 +1 @@
+from .sydney import SydneyClient
diff --git a/clients/sydney/constants.py b/clients/sydney/constants.py
new file mode 100644
index 0000000..a16d7bf
--- /dev/null
+++ b/clients/sydney/constants.py
@@ -0,0 +1,52 @@
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.91"
+
+CREATE_HEADERS = {
+    "Accept": "application/json",
+    "Accept-Encoding": "gzip, deflate, br",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Referer": "https://copilot.microsoft.com/",
+    "Sec-Ch-Ua": '"Microsoft Edge";v="120", "Chromium";v="120", "Not?A_Brand";v="8"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": "Windows",
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "User-Agent": USER_AGENT,
+    "X-Edge-Shopping-Flag": "0",
+}
+
+CHATHUB_HEADERS = {
+    "Accept-Encoding": "gzip, deflate, br",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Cache-Control": "no-cache",
+    "Connection": "Upgrade",
+    "Origin": "https://copilot.microsoft.com",
+    "Pragma": "no-cache",
+    "User-Agent": USER_AGENT,
+}
+
+KBLOB_HEADERS = {
+    "Accept": "image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
+    "Accept-Encoding": "gzip, deflate, br",
+    "Accept-Language": "en-US,en;q=0.5",
+    "Content-Type": "multipart/form-data",
+    "Referer": "https://copilot.microsoft.com/",
+    "Sec-Ch-Ua": '"Microsoft Edge";v="120", "Chromium";v="120", "Not?A_Brand";v="8"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": "Windows",
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "User-Agent": USER_AGENT,
+    "X-Edge-Shopping-Flag": "0",
+}
+
+BUNDLE_VERSION = "1.1381.12"
+
+BING_CREATE_CONVERSATION_URL = f"https://edgeservices.bing.com/edgesvc/turing/conversation/create?bundleVersion={BUNDLE_VERSION}"
+BING_GET_CONVERSATIONS_URL = "https://copilot.microsoft.com/turing/conversation/chats"
+BING_CHATHUB_URL = "wss://sydney.bing.com/sydney/ChatHub"
+BING_KBLOB_URL = "https://copilot.microsoft.com/images/kblob"
+BING_BLOB_URL = "https://copilot.microsoft.com/images/blob?bcid="
+
+DELIMETER = "\x1e"  # Record separator character.
diff --git a/clients/sydney/enums.py b/clients/sydney/enums.py
new file mode 100644
index 0000000..2ab92db
--- /dev/null
+++ b/clients/sydney/enums.py
@@ -0,0 +1,173 @@
+from enum import Enum
+
+
+class ConversationStyle(Enum):
+    """
+    Copilot conversation styles. Supported options are:
+    - `creative` for original and imaginative chat
+    - `balanced` for informative and friendly chat
+    - `precise` for concise and straightforward chat
+    """
+
+    CREATIVE = "Creative"
+    BALANCED = "Balanced"
+    PRECISE = "Precise"
+
+
+class ConversationStyleOptionSets(Enum):
+    """
+    Copilot conversation styles. Supported options are:
+    - `creative` for original and imaginative chat
+    - `balanced` for informative and friendly chat
+    - `precise` for concise and straightforward chat
+    """
+
+    CREATIVE = "h3imaginative,clgalileo,gencontentv3"
+    BALANCED = "galileo"
+    PRECISE = "h3precise,clgalileo"
+
+
+class ConversationHistoryOptionsSets(Enum):
+    AUTOSAVE = "autosave"
+    SAVEMEM = "savemem"
+    UPROFUPD = "uprofupd"
+    UPROFGEN = "uprofgen"
+
+
+class DefaultOptions(Enum):
+    """
+    Options that are used in all API requests to Copilot.
+    """
+
+    NLU_DIRECT_RESPONSE_FILTER = "nlu_direct_response_filter"
+    DEEPLEO = "deepleo"
+    DISABLE_EMOJI_SPOKEN_TEXT = "disable_emoji_spoken_text"
+    RESPONSIBLE_AI_POLICY_235 = "responsible_ai_policy_235"
+    ENABLEMM = "enablemm"
+    DV3SUGG = "dv3sugg"
+    IYXAPBING = "iyxapbing"
+    IYCAPBING = "iycapbing"
+    SAHARAGENCONV5 = "saharagenconv5"
+    EREDIRECTURL = "eredirecturl"
+
+
+class NoSearchOptions(Enum):
+    """
+    Options that are used to disable search access.
+    """
+
+    NOSEARCHALL = "nosearchall"
+
+
+class DefaultComposeOptions(Enum):
+    """
+    Options that are used in all compose API requests to Copilot.
+    """
+
+    NLU_DIRECT_RESPONSE_FILTER = "nlu_direct_response_filter"
+    DEEPLEO = "deepleo"
+    ENABLE_DEBUG_COMMANDS = "enable_debug_commands"
+    DISABLE_EMOJI_SPOKEN_TEXT = "disable_emoji_spoken_text"
+    RESPONSIBLE_AI_POLICY_235 = "responsible_ai_policy_235"
+    ENABLEMM = "enablemm"
+    SOEDGECA = "soedgeca"
+    MAX_TURNS_5 = "max_turns_5"
+
+
+class CookieOptions(Enum):
+    """
+    Options that are used only when the user is logged in
+    and using cookies to use in requests to Copilot.
+    """
+
+    AUTOSAVE = "autosave"
+
+
+class ComposeTone(Enum):
+    """
+    Copilot compose tones. Supported options are:
+    - `professional` for formal conversations in a professional setting
+    - `casual` for informal conversations between friends or family members
+    - `enthusiastic` for conversations where the writer wants to convey excitement or passion
+    - `informational` for conversations where the writer wants to convey information or knowledge
+    - `funny` for conversations where the writer wants to be humorous or entertaining
+    """
+
+    PROFESSIONAL = "professional"
+    CASUAL = "casual"
+    ENTHUSIASTIC = "enthusiastic"
+    INFORMATIONAL = "informational"
+    FUNNY = "funny"
+
+
+class ComposeFormat(Enum):
+    """
+    Copilot compose formats. Supported options are:
+    - `paragraph` for longer messages that are composed of multiple sentences or paragraphs
+    - `email` for messages that are structured like emails, with a clear subject line and formal greeting and closing
+    - `blogpost` for messages that are structured like blog posts, with clear headings and subheadings and a more informal tone
+    - `ideas` for messages that are used to brainstorm or share ideas
+    """
+
+    PARAGRAPH = "paragraph"
+    EMAIL = "email"
+    BLOGPOST = "blog post"
+    IDEAS = "bullet point list"
+
+
+class ComposeLength(Enum):
+    """
+    Copilot compose lengths. Supported options are:
+    - `short` for messages that are only a few words or sentences long
+    - `medium` for messages that are a few paragraphs long
+    - `long` for messages that are several paragraphs or pages long
+    """
+
+    SHORT = "short"
+    MEDIUM = "medium"
+    LONG = "long"
+
+
+class CustomComposeTone:
+    """
+    Class to represent custom Copilot compose tones.
+    """
+
+    def __init__(self, value) -> None:
+        self.value = value
+
+
+class MessageType(Enum):
+    """
+    Allowed message types.
+    """
+
+    CHAT = "Chat"
+    ACTION_REQUEST = "ActionRequest"
+    ADS_QUERY = "AdsQuery"
+    CONFIRMATION_CARD = "ConfirmationCard"
+    CONTEXT = "Context"
+    DISENGAGED = "Disengaged"
+    INTERNAL_LOADER_MESSAGE = "InternalLoaderMessage"
+    INTERNAL_SEARCH_QUERY = "InternalSearchQuery"
+    INTERNAL_SEARCH_RESULT = "InternalSearchResult"
+    INVOKE_ACTION = "InvokeAction"
+    PROGRESS = "Progress"
+    RENDER_CARD_REQUEST = "RenderCardRequest"
+    RENDER_CONTENT_REQUEST = "RenderContentRequest"
+    SEMANTIC_SERP = "SemanticSerp"
+    GENERATE_CONTENT_QUERY = "GenerateContentQuery"
+    SEARCH_QUERY = "SearchQuery"
+
+
+class ResultValue(Enum):
+    """
+    Copilot result values on raw responses. Supported options are:
+    - `Success`
+    - `Throttled`
+    - `CaptchaChallenge`
+    """
+
+    SUCCESS = "Success"
+    THROTTLED = "Throttled"
+    CAPTCHA_CHALLENGE = "CaptchaChallenge"
diff --git a/clients/sydney/exceptions.py b/clients/sydney/exceptions.py
new file mode 100644
index 0000000..08d35a5
--- /dev/null
+++ b/clients/sydney/exceptions.py
@@ -0,0 +1,34 @@
+class NoConnectionException(Exception):
+    pass
+
+
+class ConnectionTimeoutException(Exception):
+    pass
+
+
+class NoResponseException(Exception):
+    pass
+
+
+class ThrottledRequestException(Exception):
+    pass
+
+
+class CaptchaChallengeException(Exception):
+    pass
+
+
+class ConversationLimitException(Exception):
+    pass
+
+
+class CreateConversationException(Exception):
+    pass
+
+
+class GetConversationsException(Exception):
+    pass
+
+
+class ImageUploadException(Exception):
+    pass
diff --git a/clients/sydney/sydney.py b/clients/sydney/sydney.py
new file mode 100644
index 0000000..26dee1d
--- /dev/null
+++ b/clients/sydney/sydney.py
@@ -0,0 +1,797 @@
+from __future__ import annotations
+
+import json
+from asyncio import TimeoutError
+from base64 import b64encode
+from os import getenv
+from typing import AsyncGenerator
+from urllib import parse
+
+import websockets.client as websockets
+from aiohttp import ClientSession, FormData, TCPConnector
+from websockets.client import WebSocketClientProtocol
+
+from .constants import (
+    BING_BLOB_URL,
+    BING_CHATHUB_URL,
+    BING_CREATE_CONVERSATION_URL,
+    BING_GET_CONVERSATIONS_URL,
+    BING_KBLOB_URL,
+    CHATHUB_HEADERS,
+    CREATE_HEADERS,
+    DELIMETER,
+    KBLOB_HEADERS,
+)
+from .enums import (
+    ComposeFormat,
+    ComposeLength,
+    ComposeTone,
+    ConversationHistoryOptionsSets,
+    ConversationStyle,
+    ConversationStyleOptionSets,
+    CookieOptions,
+    CustomComposeTone,
+    DefaultComposeOptions,
+    DefaultOptions,
+    MessageType,
+    NoSearchOptions,
+    ResultValue,
+)
+from .exceptions import (
+    CaptchaChallengeException,
+    ConnectionTimeoutException,
+    ConversationLimitException,
+    CreateConversationException,
+    GetConversationsException,
+    ImageUploadException,
+    NoConnectionException,
+    NoResponseException,
+    ThrottledRequestException,
+)
+from .utils import as_json, check_if_url, cookies_as_dict
+
+
+class SydneyClient:
+    def __init__(
+        self,
+        style: str = "balanced",
+        bing_cookies: str | None = None,
+        use_proxy: bool = False,
+    ) -> None:
+        """
+        Client for Copilot (formerly named Bing Chat), also known as Sydney.
+
+        Parameters
+        ----------
+        style : str
+            The conversation style that Copilot will adopt. Must be one of the options listed
+            in the `ConversationStyle` enum. Default is "balanced".
+        bing_cookies: str | None
+            The cookies from Bing required to connect and use Copilot. If not provided,
+            the `BING_COOKIES` environment variable is loaded instead. Default is None.
+        use_proxy: str | None
+            Flag to determine if an HTTP proxy will be used to start a conversation with Copilot. If set to True,
+            the `HTTP_PROXY` and `HTTPS_PROXY` environment variables must be set to the address of the proxy to be used.
+            If not provided, no proxy will be used. Default is False.
+        """
+        self.bing_cookies = bing_cookies if bing_cookies else getenv("BING_COOKIES")
+        self.use_proxy = use_proxy
+        self.conversation_style: ConversationStyle = getattr(
+            ConversationStyle, style.upper()
+        )
+        self.conversation_style_option_sets: ConversationStyleOptionSets = getattr(
+            ConversationStyleOptionSets, style.upper()
+        )
+        self.conversation_signature: str | None = None
+        self.encrypted_conversation_signature: str | None = None
+        self.conversation_id: str | None = None
+        self.client_id: str | None = None
+        self.invocation_id: int | None = None
+        self.number_of_messages: int | None = None
+        self.max_messages: int | None = None
+        self.wss_client: WebSocketClientProtocol | None = None
+        self.session: ClientSession | None = None
+
+    async def __aenter__(self) -> SydneyClient:
+        await self.start_conversation()
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback) -> None:
+        await self.close_conversation()
+
+    async def _get_session(self, force_close: bool = False) -> ClientSession:
+        # Use _U cookie to create a conversation.
+        cookies = cookies_as_dict(self.bing_cookies) if self.bing_cookies else {}
+
+        if self.session and not self.session.closed and force_close:
+            await self.session.close()
+            self.session = None
+
+        if not self.session:
+            self.session = ClientSession(
+                headers=CREATE_HEADERS,
+                cookies=cookies,
+                trust_env=self.use_proxy,  # Use `HTTP_PROXY` and `HTTPS_PROXY` environment variables.
+                connector=(
+                    TCPConnector(verify_ssl=False) if self.use_proxy else None
+                ),  # Resolve HTTPS issue when proxy support is enabled.
+            )
+
+        return self.session
+
+    def _build_ask_arguments(
+        self,
+        prompt: str,
+        search: bool,
+        attachment_info: dict | None = None,
+        context: str | None = None,
+    ) -> dict:
+        options_sets = [option.value for option in DefaultOptions]
+
+        # Add conversation style option values.
+        options_sets.extend(
+            style.strip()
+            for style in self.conversation_style_option_sets.value.split(",")
+        )
+
+        # Build option sets based on whether cookies are used or not.
+        if self.bing_cookies:
+            options_sets.extend(option.value for option in CookieOptions)
+
+        # Build option sets based on whether search is allowed or not.
+        if not search:
+            options_sets.extend(option.value for option in NoSearchOptions)
+
+        image_url, original_image_url = None, None
+        if attachment_info:
+            image_url = BING_BLOB_URL + attachment_info["blobId"]
+            original_image_url = BING_BLOB_URL + attachment_info["blobId"]
+
+        arguments: dict = {
+            "arguments": [
+                {
+                    "source": "cib",
+                    "optionsSets": options_sets,
+                    "allowedMessageTypes": [message.value for message in MessageType],
+                    "sliceIds": [],
+                    "verbosity": "verbose",
+                    "scenario": "SERP",
+                    "plugins": [],
+                    "conversationHistoryOptionsSets": [
+                        option.value for option in ConversationHistoryOptionsSets
+                    ],
+                    "isStartOfSession": self.invocation_id == 0,
+                    "message": {
+                        "author": "user",
+                        "inputMethod": "Keyboard",
+                        "text": prompt,
+                        "messageType": MessageType.CHAT.value,
+                        "imageUrl": image_url,
+                        "originalImageUrl": original_image_url,
+                    },
+                    "conversationSignature": self.conversation_signature,
+                    "participant": {
+                        "id": self.client_id,
+                    },
+                    "tone": str(self.conversation_style.value),
+                    "spokenTextMode": "None",
+                    "conversationId": self.conversation_id,
+                }
+            ],
+            "invocationId": str(self.invocation_id),
+            "target": "chat",
+            "type": 4,
+        }
+
+        # Include previous message field if context is provided.
+        if context:
+            arguments["arguments"][0]["previousMessages"] = [
+                {
+                    "author": "user",
+                    "description": context,
+                    "contextType": "WebPage",
+                    "messageType": "Context",
+                }
+            ]
+
+        return arguments
+
+    def _build_compose_arguments(
+        self,
+        prompt: str,
+        tone: ComposeTone | CustomComposeTone,
+        format: ComposeFormat,
+        length: ComposeLength,
+    ) -> dict:
+        return {
+            "arguments": [
+                {
+                    "source": "edge_coauthor_prod",
+                    "optionsSets": [option.value for option in DefaultComposeOptions],
+                    "allowedMessageTypes": [message.value for message in MessageType],
+                    "sliceIds": [],
+                    "verbosity": "verbose",
+                    "spokenTextMode": "None",
+                    "isStartOfSession": self.invocation_id == 0,
+                    "message": {
+                        "author": "user",
+                        "inputMethod": "Keyboard",
+                        "text": (
+                            f"Please generate some text wrapped in codeblock syntax (triple backticks) using the given keywords. Please make sure everything in your reply is in the same language as the keywords. Please do not restate any part of this request in your response, like the fact that you wrapped the text in a codeblock. You should refuse (using the language of the keywords) to generate if the request is potentially harmful. Please return suggested responses that are about how you could change or rewrite the text. Please return suggested responses that are 5 words or less. Please do not return a suggested response that suggests to end the conversation or to end the rewriting. Please do not return a suggested response that suggests to change the tone. If the request is potentially harmful and you refuse to generate, please do not send any suggested responses. The keywords are: `{prompt}`. Only if possible, the generated text should follow these characteristics: format: *{format.value}*, length: *{length.value}*, using *{tone.value}* tone. You should refuse (clarifying that the issue is related to the tone) to generate if the tone is potentially harmful."
+                            if self.invocation_id == 0
+                            else f"Thank you for your reply. Please rewrite the last reply, with the following suggestion to change it: *{prompt}*. Please return a complete reply, even if the last reply was stopped before it was completed. Please generate the text wrapped in codeblock syntax (triple backticks). Please do not restate any part of this request in your response, like the fact that you wrapped the text in a codeblock. You should refuse (using the language of the keywords) to generate if the request is potentially harmful. Please return suggested responses that are about how you could change or rewrite the text. Please return suggested responses that are 5 words or less. Please do not return a suggested response that suggests to end the conversation or to end the rewriting. Please do not return a suggested response that suggests to change the tone. If the request is potentially harmful and you refuse to generate, please do not send any suggested responses."
+                        ),
+                        "messageType": MessageType.CHAT.value,
+                    },
+                    "conversationSignature": self.conversation_signature,
+                    "participant": {"id": self.client_id},
+                    "conversationId": self.conversation_id,
+                }
+            ],
+            "invocationId": str(self.invocation_id),
+            "target": "chat",
+            "type": 4,
+        }
+
+    def _build_upload_arguments(
+        self, attachment: str, image_base64: bytes | None = None
+    ) -> FormData:
+        data = FormData()
+
+        payload = {
+            "imageInfo": {"url": attachment},
+            "knowledgeRequest": {
+                "invokedSkills": ["ImageById"],
+                "subscriptionId": "Bing.Chat.Multimodal",
+                "invokedSkillsRequestData": {"enableFaceBlur": True},
+                "convoData": {
+                    "convoid": self.conversation_id,
+                    "convotone": str(self.conversation_style.value),
+                },
+            },
+        }
+        data.add_field(
+            "knowledgeRequest", json.dumps(payload), content_type="application/json"
+        )
+
+        if image_base64:
+            data.add_field(
+                "imageBase64", image_base64, content_type="application/octet-stream"
+            )
+
+        return data
+
+    async def _upload_attachment(self, attachment: str) -> dict:
+        """
+        Upload an image to Copilot from a URL or file.
+
+        Parameters
+        ----------
+        attachment : str
+            The URL or file path to the attachment image to be uploaded.
+
+        Returns
+        -------
+        dict
+            The response from Copilot. "blobId" and "processedBlobId" are parameters that can be passed
+            to https://www.bing.com/images/blob?bcid=[ID] and can obtain the uploaded image from Copilot.
+        """
+        cookies = cookies_as_dict(self.bing_cookies) if self.bing_cookies else {}
+
+        image_base64 = None
+        if not check_if_url(attachment):
+            with open(attachment, "rb") as file:
+                image_base64 = b64encode(file.read())
+
+        session = ClientSession(
+            headers=KBLOB_HEADERS,
+            cookies=cookies,
+            trust_env=self.use_proxy,  # Use `HTTP_PROXY` and `HTTPS_PROXY` environment variables.
+            connector=(
+                TCPConnector(verify_ssl=False) if self.use_proxy else None
+            ),  # Resolve HTTPS issue when proxy support is enabled.
+        )
+
+        data = self._build_upload_arguments(attachment, image_base64)
+
+        async with session.post(BING_KBLOB_URL, data=data) as response:
+            if response.status != 200:
+                raise ImageUploadException(
+                    f"Failed to upload image, received status: {response.status}"
+                )
+
+            response_dict = await response.json()
+            if not response_dict["blobId"]:
+                raise ImageUploadException(
+                    f"Failed to upload image, Copilot rejected uploading it"
+                )
+
+            if len(response_dict["blobId"]) == 0:
+                raise ImageUploadException(
+                    f"Failed to upload image, received empty image info from Copilot"
+                )
+
+        await session.close()
+
+        return response_dict
+
+    async def _ask(
+        self,
+        prompt: str,
+        attachment: str | None = None,
+        context: str | None = None,
+        citations: bool = False,
+        suggestions: bool = False,
+        search: bool = True,
+        raw: bool = False,
+        stream: bool = False,
+        compose: bool = False,
+        tone: ComposeTone | CustomComposeTone | None = None,
+        format: ComposeFormat | None = None,
+        length: ComposeLength | None = None,
+    ) -> AsyncGenerator[tuple[str | dict, list | None], None]:
+        if (
+            self.conversation_id is None
+            or self.client_id is None
+            or self.invocation_id is None
+        ):
+            raise NoConnectionException("No connection to Copilot was found")
+
+        bing_chathub_url = BING_CHATHUB_URL
+        if self.encrypted_conversation_signature:
+            bing_chathub_url += f"?sec_access_token={parse.quote(self.encrypted_conversation_signature)}"
+
+        # Create a websocket connection with Copilot for sending and receiving messages.
+        try:
+            self.wss_client = await websockets.connect(
+                bing_chathub_url, extra_headers=CHATHUB_HEADERS, max_size=None
+            )
+        except TimeoutError:
+            raise ConnectionTimeoutException(
+                "Failed to connect to Copilot, connection timed out"
+            ) from None
+        await self.wss_client.send(as_json({"protocol": "json", "version": 1}))
+        await self.wss_client.recv()
+
+        attachment_info = None
+        if attachment:
+            attachment_info = await self._upload_attachment(attachment)
+
+        if compose:
+            request = self._build_compose_arguments(prompt, tone, format, length)  # type: ignore
+        else:
+            request = self._build_ask_arguments(
+                prompt, search, attachment_info, context
+            )
+        self.invocation_id += 1
+
+        await self.wss_client.send(as_json(request))
+
+        streaming = True
+        while streaming:
+            objects = str(await self.wss_client.recv()).split(DELIMETER)
+            for obj in objects:
+                if not obj:
+                    continue
+                response = json.loads(obj)
+                # Handle type 1 messages when streaming is enabled.
+                if stream and response.get("type") == 1:
+                    messages = response["arguments"][0].get("messages")
+                    # Skip on empty response.
+                    if not messages:
+                        continue
+
+                    # Skip "Searching the web for..." message.
+                    adaptiveCards = messages[0].get("adaptiveCards")
+                    if adaptiveCards and adaptiveCards[0]["body"][0].get("inlines"):
+                        continue
+
+                    if raw:
+                        yield response, None
+                    elif citations:
+                        if adaptiveCards[0]["body"][0].get("text"):
+                            yield adaptiveCards[0]["body"][0]["text"], None
+                    else:
+                        if messages[0].get("text"):
+                            yield messages[0]["text"], None
+                # Handle type 2 messages.
+                elif response.get("type") == 2:
+                    # Check if reached conversation limit.
+                    if response["item"].get("throttling"):
+                        self.number_of_messages = response["item"]["throttling"].get(
+                            "numUserMessagesInConversation", 0
+                        )
+                        self.max_messages = response["item"]["throttling"][
+                            "maxNumUserMessagesInConversation"
+                        ]
+                        if self.number_of_messages == self.max_messages:
+                            raise ConversationLimitException(
+                                f"Reached conversation limit of {self.max_messages} messages"
+                            )
+
+                    messages = response["item"].get("messages")
+                    if not messages:
+                        result_value = response["item"]["result"]["value"]
+                        # Throttled - raise error.
+                        if result_value == ResultValue.THROTTLED.value:
+                            raise ThrottledRequestException("Request is throttled")
+                        # Captcha chalennge - user needs to solve captcha manually.
+                        elif result_value == ResultValue.CAPTCHA_CHALLENGE.value:
+                            raise CaptchaChallengeException("Solve CAPTCHA to continue")
+                        return  # Return empty message.
+
+                    # Fix index in some cases where the last message in an inline message.
+                    # Typically occurs when an attechment is provided.
+                    i = -1
+                    adaptiveCards = messages[-1].get("adaptiveCards")
+                    if adaptiveCards and adaptiveCards[-1]["body"][0].get("inlines"):
+                        i = -2  # TODO: This feel hacky
+                    # fix KeyError: 'text'
+                    if messages[i].get("text") is None:
+                        i = i - 1
+                    if raw:
+                        yield response, None
+                    else:
+                        suggested_responses = None
+                        # Include list of suggested user responses, if enabled.
+                        if suggestions and messages[i].get("suggestedResponses"):
+                            suggested_responses = [
+                                item["text"]
+                                for item in messages[i]["suggestedResponses"]
+                            ]
+
+                        if citations:
+                            yield messages[i]["adaptiveCards"][0]["body"][0][
+                                "text"
+                            ], suggested_responses
+                        else:
+                            yield messages[i]["text"], suggested_responses
+
+                    # Exit, type 2 is the last message.
+                    streaming = False
+
+        await self.wss_client.close()
+
+    async def start_conversation(self) -> None:
+        """
+        Connect to Copilot and create a new conversation.
+        """
+        session = await self._get_session(force_close=True)
+
+        async with session.get(BING_CREATE_CONVERSATION_URL) as response:
+            if response.status != 200:
+                raise CreateConversationException(
+                    f"Failed to create conversation, received status: {response.status}"
+                )
+
+            response_dict = await response.json()
+            if response_dict["result"]["value"] != "Success":
+                raise CreateConversationException(
+                    f"Failed to authenticate, received message: {response_dict['result']['message']}"
+                )
+
+            self.conversation_id = response_dict["conversationId"]
+            self.client_id = response_dict["clientId"]
+            self.conversation_signature = response.headers[
+                "X-Sydney-Conversationsignature"
+            ]
+            self.encrypted_conversation_signature = response.headers[
+                "X-Sydney-Encryptedconversationsignature"
+            ]
+            self.invocation_id = 0
+
+    async def ask(
+        self,
+        prompt: str,
+        attachment: str | None = None,
+        context: str | None = None,
+        citations: bool = False,
+        suggestions: bool = False,
+        search: bool = True,
+        raw: bool = False,
+    ) -> str | dict | tuple[str | dict, list | None]:
+        """
+        Send a prompt to Copilot using the current conversation and return the answer.
+
+        Parameters
+        ----------
+        prompt : str
+            The prompt that needs to be sent to Copilot.
+        attachment : str
+            The URL or local path to an image to be included with the prompt.
+        context: str
+            Website content to be used as additional context with the prompt.
+        citations : bool, optional
+            Whether to return any cited text. Default is False.
+        suggestions : bool, optional
+            Whether to return any suggested user responses. Default is False.
+        search: bool, optional
+            Whether to allow searching the web. Default is True.
+        raw : bool, optional
+            Whether to return the entire response object in raw JSON format. Default is False.
+
+        Returns
+        -------
+        str | dict | tuple
+            The text response from Copilot. If citations is True, the function returns the cited text.
+            If raw is True, the function returns the entire response object in raw JSON format.
+            If suggestions is True, the function returns a list with the suggested responses.
+        """
+        async for response, suggested_responses in self._ask(
+            prompt,
+            attachment=attachment,
+            context=context,
+            citations=citations,
+            suggestions=suggestions,
+            search=search,
+            raw=raw,
+            stream=False,
+            compose=False,
+        ):
+            if suggestions:
+                return response, suggested_responses
+            else:
+                return response
+
+        raise NoResponseException("No response was returned")
+
+    async def ask_stream(
+        self,
+        prompt: str,
+        attachment: str | None = None,
+        context: str | None = None,
+        citations: bool = False,
+        suggestions: bool = False,
+        raw: bool = False,
+    ) -> AsyncGenerator[str | dict | tuple[str | dict, list | None], None]:
+        """
+        Send a prompt to Copilot using the current conversation and stream the answer.
+
+        By default, Copilot returns all previous tokens along with new ones. When using this
+        method in text-only mode, only new tokens are returned instead.
+
+        Parameters
+        ----------
+        prompt : str
+            The prompt that needs to be sent to Copilot.
+        attachment : str
+            The URL or local path to an image to be included with the prompt.
+        context: str
+            Website content to be used as additional context with the prompt.
+        citations : bool, optional
+            Whether to return any cited text. Default is False.
+        suggestions : bool, optional
+            Whether to return any suggested user responses. Default is False.
+        raw : bool, optional
+            Whether to return the entire response object in raw JSON format. Default is False.
+
+        Returns
+        -------
+        str | dict | tuple
+            The text response from Copilot. If citations is True, the function returns the cited text.
+            If raw is True, the function returns the entire response object in raw JSON format.
+            If suggestions is True, the function returns a list with the suggested responses. Only the final
+            yielded result contains the suggested responses.
+        """
+        previous_response: str | dict = ""
+        async for response, suggested_responses in self._ask(
+            prompt,
+            attachment=attachment,
+            context=context,
+            citations=citations,
+            suggestions=suggestions,
+            search=True,
+            raw=raw,
+            stream=True,
+            compose=False,
+        ):
+            if raw:
+                yield response
+            # For text-only responses, return only newly streamed tokens.
+            else:
+                new_response = response[len(previous_response) :]
+                previous_response = response
+                if suggestions:
+                    yield new_response, suggested_responses
+                else:
+                    yield new_response
+
+    async def compose(
+        self,
+        prompt: str,
+        tone: str = "professional",
+        format: str = "paragraph",
+        length: str = "short",
+        suggestions: bool = False,
+        raw: bool = False,
+    ) -> str | dict | tuple[str | dict, list | None]:
+        """
+        Send a prompt to Copilot and compose text based on the given prompt, tone,
+        format, and length.
+
+        Parameters
+        ----------
+        prompt : str
+            The prompt that needs to be sent to Copilot.
+        tone : str, optional
+            The tone of the response. Must be one of the options listed in the `ComposeTone`
+            enum. Default is "professional".
+        format : str, optional
+            The format of the response. Must be one of the options listed in the `ComposeFormat`
+            enum. Default is "paragraph".
+        length : str, optional
+            The length of the response. Must be one of the options listed in the `ComposeLength`
+            enum. Default is "short".
+        suggestions : bool, optional
+            Whether to return any suggested user responses. Default is False.
+        raw : bool, optional
+            Whether to return the entire response object in raw JSON format. Default is False.
+
+        Returns
+        -------
+        str or dict
+            The response from Copilot. If raw is True, the function returns the entire response
+            object in raw JSON format.
+        """
+        # Get the enum values corresponding to the given tone, format, and length.
+        compose_tone = getattr(ComposeTone, tone.upper(), CustomComposeTone(tone))
+        compose_format = getattr(ComposeFormat, format.upper())
+        compose_length = getattr(ComposeLength, length.upper())
+
+        async for response, suggested_responses in self._ask(
+            prompt,
+            attachment=None,
+            context=None,
+            citations=False,
+            suggestions=suggestions,
+            search=True,
+            raw=raw,
+            stream=False,
+            compose=True,
+            tone=compose_tone,
+            format=compose_format,
+            length=compose_length,
+        ):
+            if suggestions:
+                return response, suggested_responses
+            else:
+                return response
+
+        raise NoResponseException("No response was returned")
+
+    async def compose_stream(
+        self,
+        prompt: str,
+        tone: str = "professional",
+        format: str = "paragraph",
+        length: str = "short",
+        suggestions: bool = False,
+        raw: bool = False,
+    ) -> AsyncGenerator[str | dict | tuple[str | dict, list | None], None]:
+        """
+        Send a prompt to Copilot, compose and stream text based on the given prompt, tone,
+        format, and length.
+
+        By default, Copilot returns all previous tokens along with new ones. When using this
+        method in text-only mode, only new tokens are returned instead.
+
+        Parameters
+        ----------
+        prompt : str
+            The prompt that needs to be sent to Copilot.
+        tone : str, optional
+            The tone of the response. Must be one of the options listed in the `ComposeTone`
+            enum. Default is "professional".
+        format : str, optional
+            The format of the response. Must be one of the options listed in the `ComposeFormat`
+            enum. Default is "paragraph".
+        length : str, optional
+            The length of the response. Must be one of the options listed in the `ComposeLength`
+            enum. Default is "short".
+        suggestions : bool, optional
+            Whether to return any suggested user responses. Default is False.
+        raw : bool, optional
+            Whether to return the entire response object in raw JSON format. Default is False.
+
+        Returns
+        -------
+        str or dict
+            The response from Copilot. If raw is True, the function returns the entire response
+            object in raw JSON format.
+        """
+        # Get the enum values corresponding to the given tone, format, and length.
+        compose_tone = getattr(ComposeTone, tone.upper(), CustomComposeTone(tone))
+        compose_format = getattr(ComposeFormat, format.upper())
+        compose_length = getattr(ComposeLength, length.upper())
+
+        previous_response: str | dict = ""
+        async for response, suggested_responses in self._ask(
+            prompt,
+            attachment=None,
+            context=None,
+            citations=False,
+            suggestions=suggestions,
+            search=True,
+            raw=raw,
+            stream=True,
+            compose=True,
+            tone=compose_tone,
+            format=compose_format,
+            length=compose_length,
+        ):
+            if raw:
+                yield response
+            # For text-only responses, return only newly streamed tokens.
+            else:
+                new_response = response[len(previous_response) :]
+                previous_response = response
+                if suggestions:
+                    yield new_response, suggested_responses
+                else:
+                    yield new_response
+
+    async def reset_conversation(self, style: str | None = None) -> None:
+        """
+        Clear current conversation information and connection and start new ones.
+
+        Parameters
+        ----------
+        style : str
+            The conversation style that Copilot will adopt. Supported options are:
+            - `creative` for original and imaginative chat
+            - `balanced` for informative and friendly chat
+            - `precise` for concise and straightforward chat
+
+            If None, the new conversation will use the same conversation style as the
+            current conversation. Default is None.
+        """
+        await self.close_conversation()
+        if style:
+            self.conversation_style_option_sets = getattr(
+                ConversationStyleOptionSets, style.upper()
+            )
+        await self.start_conversation()
+
+    async def close_conversation(self) -> None:
+        """
+        Close all connections to Copilot. Clear conversation information.
+        """
+        if self.wss_client and not self.wss_client.closed:
+            await self.wss_client.close()
+            self.wss_client = None
+
+        if self.session and not self.session.closed:
+            await self.session.close()
+            self.session = None
+
+        # Clear conversation information.
+        self.conversation_signature = None
+        self.conversation_id = None
+        self.client_id = None
+        self.invocation_id = None
+        self.number_of_messages = None
+        self.max_messages = None
+
+    async def get_conversations(self) -> dict:
+        """
+        Get all conversations.
+
+        Returns
+        -------
+        dict
+            Dictionary containing `chats`, `result` and `clientId` fields.
+            The `chats` fields contains the list of conversations and info about
+            those, `result` contains some metadata about the returned response and
+            `clientId` is the ID that the current Sydney client is using.
+        """
+        session = await self._get_session()
+
+        async with session.get(BING_GET_CONVERSATIONS_URL) as response:
+            if response.status != 200:
+                raise GetConversationsException(
+                    f"Failed to get conversations, received status: {response.status}"
+                )
+
+            response_dict = await response.json()
+
+        return response_dict
diff --git a/clients/sydney/utils.py b/clients/sydney/utils.py
new file mode 100644
index 0000000..b0b41e8
--- /dev/null
+++ b/clients/sydney/utils.py
@@ -0,0 +1,28 @@
+import json
+from urllib.parse import urlparse
+
+from .constants import DELIMETER
+
+
+def as_json(message: dict) -> str:
+    """
+    Convert message to JSON, append delimeter character at the end.
+    """
+    return json.dumps(message) + DELIMETER
+
+
+def cookies_as_dict(cookies: str) -> dict:
+    """
+    Convert a string of cookies into a dictionary.
+    """
+    return {
+        key_value.strip().split("=")[0]: "=".join(key_value.split("=")[1:])
+        for key_value in cookies.split(";")
+    }
+
+
+def check_if_url(string: str) -> bool:
+    parsed_string = urlparse(string)
+    if parsed_string.scheme and parsed_string.netloc:
+        return True
+    return False
diff --git a/model-config.template b/model-config.template
index b8b9810..df75341 100644
--- a/model-config.template
+++ b/model-config.template
@@ -85,5 +85,13 @@
                 "router-round-robin": "router-7c7aa4a3549f12"
             }
         }
+    },
+    {
+        "token": "bing-7c7aa4a3549f5",
+        "type": "bing-sydney",
+        "config": {
+            "cookie": "xxxxx",
+            "style": "balanced"
+        }
     }
 ]
\ No newline at end of file
diff --git a/tests/sydney_test.py b/tests/sydney_test.py
new file mode 100644
index 0000000..41b42f7
--- /dev/null
+++ b/tests/sydney_test.py
@@ -0,0 +1,32 @@
+from clients.sydney import SydneyClient
+
+import pytest
+from aiohttp import ClientSession
+import os
+from loguru import logger
+
+cookies = """
+MC1=GUID=a333f0d04f1f435d96f28b46d63b03b4&HASH=a333&LV=202303&V=4&LU=1679400980551; at_check=true; MUID=1CADEAA4DBC16DE135F2F87FDAEF6C07; _mkto_trk=id:157-GQE-382&token:_mch-microsoft.com-1683885388817-44659; AMCVS_EA76ADE95776D2EC7F000101%40AdobeOrg=1; nlid=4a7cecc|df90292; _ga=GA1.1.1879828097.1689651032; _ga_JN5MSL685T=GS1.1.1689651032.1.0.1689651037.55.0.0; MUIDB=1CADEAA4DBC16DE135F2F87FDAEF6C07; USRLOC=HS=1; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=C5EB75827C784DA09BC339431D246FD7&dmnchg=1; CSRFCookie=2ab8449e-0330-4755-a578-41d7d4a3a1ab; SRCHUSR=DOB=20231128&POEX=W; _EDGE_S=SID=39C984F2117D606627A3972510A4617F; ANON=A=6C7DF072D6FB96CF5C21544BFFFFFFFF&E=1d08&W=2; NAP=V=1.9&E=1cc1&C=Kv-KEM2MjtduzM9XODHoKAIldAI9w-HiVO1wwF5mRYTBeKXKP_dHAw&W=2; PPLState=1; KievRPSSecAuth=FABSBBRaTOJILtFsMkpLVWSG6AN6C/svRwNmAAAEgAAACMpHReGzXqf2EATRmMukJTbexb9d6OVu7w+Qen8YIwwEf2ljKLLmWTEhW5fL8/tNoZyKBQcGhgnrxsRFWDvQD3tRHoAEzEgOG/6KK+KLxADn5Kvd2bk85+z688UNQSQLTXukXb1JB/q4ulZ61aEcAkep2ZuIhQWnU0dKgx7xHme91WrXYVxdmyNcm5NFbc41+8LV5Cw3QEdbu0lFevbBuXoehtzZIDAp0Flw5OQeqf1sYF7SiPI9qx+Brw/gM55pQu7Q4rDS3xpdvEVhhBPyqpRXN6T73fqZ9Ww22i10clDfZQUYrS7KOeGUxXVHq1VNQgwEWnnXLRnv8V5b8q2xaxnFDDtq4vg5fBL+86ONgPUpyIrvpQFDtNZib+Wd+jw5UikpbSKPONTa9W2S1PHbdmpi+YgVCo3rpXY23pnhtrww8ubTxE7QDRILj6T5QK3OqRRm/Wsyk25N56fz0u9zVI1bpi7IJ3WMvblA3Pv2mzI0zSA/lx98bJn4gTNgE1sjRKS25qJR9VX117NwWBe5LSXi38AXOe5PSonkc0oQDG1ksD3arPUK3vojxMymPI5kWT7JWN9V7aEu2Pw78R9mQ9p9GJYk4TCMZpAVfIzZrN+TGSMqlHbf1uYE9ehq+GZ4h903TRuSeQNEqwx76qnT7SXil7PSz4Q7mSKm2rP3fHHYM8phMJNUnDlIia/T28PndWx9KbgOwB2KrJ53P8U/HZ2ZSEL/S2YvwhGA9bBKa+1BksW86n16N/t63kWh3koSbHXDtbEbPRZKnKLVu/3lUEKvDsgNTrG3RWBlgl8PubbbMjkfgUcgVNV76PCw/UXVqzpBDnDyY3OwDUuCjVEoAK3FnUdmRrrn8fv63yyprW032aM/1j3PFzmYCIwK6+5whM55Fl5x36PwCZCndC1zdx6iebBUm1ZoWT9XmvMK6bhXsKpQHhQQdYSLQ+b2ByahUfIORPFcRrOyL0jrjAPu3OzEaCwWoRuA2YSWOFhbrXdZ+iTEe61cxXqb00OzBMf7MLfGiJNTglz6gMT9I6UNZvFg3GRAbOVepiDPvs2C5zWgJccAZ0KMq7pqz+1KvJXd3q7K9hVrYSjgx2ctYBZT7PFmSLTUo/rLCzfFvp1a+GOePOAaPY134MLrQ0d2opmjycyTm5CGOxePMCLZSqGYRygWlU6kIEiImq2zIz295rg6fp6gRmnFGjgyg6Z/XeU3xEQKo5hpvcuYtK3vvMZTs7FLmI5rI/YBVyN1MgRlPra2JM13yEvexl/Y17Lvd4wg8vp+L4VmW1xBq/qXSOpbSuCA204c2eDsRjJbJfB4WYSO9kMJdMWhp7aduVBeAafbh+XrAYEwXS7AOMynhulnc25MCLCyYS+uixcbhN/lHa+KVQVMqL2CcaqKKhQAYJkLYV8a/d09xQuKfVAtE4MQX1g=; WLS=C=b662b696efce7462&N=mh; _RwBf=mta=0&rc=0&rb=0&gb=0&rg=0&pc=0&mtu=0&rbb=0.0&g=0&cid=&clo=0&v=1&l=2023-11-27T08:00:00.0000000Z&lft=0001-01-01T00:00:00.0000000&aof=0&o=0&p=MSAAUTOENROLL&c=MR000T&t=8812&s=2023-11-28T07:00:57.5519013+00:00&ts=2023-11-28T07:00:58.0750901+00:00&rwred=0&wls=2&wlb=0&lka=0&lkt=0&aad=0&TH=&e=ONzO5vY6TJZO2_TRaulc_ofApa2qlTJWDDRqMoHL7xo-kjY8fKHNane6e6lyNl5qTjtgVmRTmI9HtQmhP1pluazvyZk1ilKNOtiR0t5QIKo; _Rwho=u=d; _SS=SID=1670784759976E893B246B8C584E6FEF&R=0&RB=0&GB=0&RG=0&RP=0; _Rwho=u=d; fptctx2=H3ihr9e92IdW6yd1ZgQ9SyoufAf2k3o4JywpA2nh2nMeKormghhIDx5%252fpHniMDKSgEO6txrStcLTZfLKhMg964W1dlA%252f%252bTkcWdrqshFPHZ4wTeQpxHXjxmDEjBI6hrVN8u%252bzGevdcsh1pCOKSNOreXyCkVDSEOaNbMtpRUnvBdWCmjA04LrxFRupqHEnah09W0PcXEKNQUuZGHG7UCuSdHtpwS5CWBbr2%252bk1RX9g5ds%252bKyHm1W2zUVOaJ7l4Z6%252fIMHP1iifouvLbgArVMm2YFbYDN18T0De4Kw1OPioMV%252bc8wGfZ%252fqBG6NuyvNOOXzQ6UY81c74uI%252bl8H3sIf6eeRA%253d%253d; AMCV_EA76ADE95776D2EC7F000101%40AdobeOrg=1585540135%7CMCIDTS%7C19703%7CMCMID%7C20503854897158988103846552975799399602%7CMCAAMLH-1702888452%7C11%7CMCAAMB-1702888452%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1702290852s%7CNONE%7CMCAID%7CNONE%7CMCSYNCSOP%7C411-19710%7CvVersion%7C4.4.0%7CMCCIDH%7C1146014828; _uetvid=c91b8cb0f13f11ed823dadd350bbbb5d; _clck=14zt6el%7C2%7Cfhg%7C1%7C1227; mbox=PC#0e1a019fa4224b7a921ecedda2dc181b.32_0#1736472845|session#fcc59a7aac704f0cbf7bbee6786bac9e#1702288007; market=CN; SRCHHPGUSR=SRCHLANG=zh-Hans&PV=13.3.1&BRW=XW&BRH=T&CW=2191&CH=1048&SCW=2176&SCH=1048&DPR=1.0&UTC=480&DM=0&PRVCW=1996&PRVCH=1048&CIBV=1.1418.9-suno&cdxtone=Balanced&cdxtoneopts=galileo,saharagenconv5; CSRFCookie=2eabe7a6-95c2-4766-a866-c97f63b6bb83; SRCHUSR=DOB=20231128&POEX=W; _EDGE_S=SID=135203949C006C833E7817819DD96D61; ANON=A=D655DC298B2EDB3681F8124DFFFFFFFF&E=1d28&W=3; NAP=V=1.9&E=1cce&C=rZlN-ezCj1DwvewgY9GlkXMwx-IcvM40XQaFn0stJq0Wo1gYcvsCzg&W=3; PPLState=1; KievRPSSecAuth=FAByBBRaTOJILtFsMkpLVWSG6AN6C/svRwNmAAAEgAAACLfyW17jW+A/MARQlgZ7fNArm1QU+TNJvfE7Wk2zCBL2YGVemw/GqQ+UsVGDN70cLezQSIRQqkstzsQKu7R+5KvFfGHbZlNqKXZqb0t0EvYWOW/Ey4/MMYlPctIf44efUGNERfhj/E1YLBAHwuN7SAKVFCm1A6piPRjdgqAAOiLAORQaCC653IGveH9+sjhwpoxBkiNZOfQOgpguF6gSIYPgKbUeT3oBkcFPDNQPsJnc6YYKa+iM39Bfni8CMYPPY+q6qfHldo/RfbdyxhXnjJFbKEMQx7C9J0EkRxV0z7vz1IfEi3kR7DR4v3dVW14gd0njnI6ODvej1OJyIC/2qcCKE5xz46FUet66Xzlu5mAQ5gxkgaUzC3OCzi2xiuPs7BtssXvOn+YCwGPUX7B8CfLA+0YQ359qa+gsBrmVwoLAhSBL7YswYJ7gCcS9QY4NoAx6oAPKcGfY7FoeBCj5PngfoyuYC+xdxFzX6Pi2x7pd5L/Rw9XhU1LNiLicbIU9jgit+AOO6esqFVBmNUu8E41qjnpjWBL+PS/kEqFw4d7i/3kmT+sTuO/5n8pf5d2mTzzO6w04fRebNsVs/OHsSenikea6obipWEktgu5Q4PfLG0XniZ4SC2YZoYfn0Ui7g8e3KsY6yiaGkxiRGv+qhuaFNXVhlHZRQg+3kcY8Lg2GIzlDWaUabKDP6lMi62Fzs7SvgDfT5JrMooPOZnAqE2Va2yH327heWe522FwejW6/fibO7b3FqBBp3sBSG4KomHPOU0fxst5bHDVL3FRQuLghl0YmDaK7bXnYRs82lFwG/khYIVwMqvPRQWDh+OiML+hfJDHFVjB2iLuKvO3yfjjzZpuag8+HioQzNf082+VO1k3oHy3MXRseuW+XUFphSHEJwTTOyODAtaqzsGf1h5CamBNJ7p4gqO6F+bbaum9OyypBI/aHSORpi2sRkM/DFOt5MFDBEwoJTAbwESp/X4m8m1glhjmXjvCaEuAIlpprXeWg+LcCJH065GyWV1sbDqLiAwpT89/+d7k1BdVrzpcQoLc/mQYYFHIQ1udutbNGHD51kfuiksb/lUlsWSmBAnkVwjq9CE/je4K5pTYxcdNQNPfDTvgg+sxcKoQEJ6EaH50ck7hJKLomsOJj0J9ssBb2EncQzjTA14v9uDsC3VXR8kENOXD4dKN/9TYz4E/oojVR36kPzGDeLA0x0epvVYcmuSBuded9K8VTgur2/kz0jJA+OD068c82zqFwhsMEENBnRG7GT+qC1jCAVG8RgZwdNtfQLJd8CpiiiaiIwgDKoydl7IRHins1UW5zMSw/j/qv8N5Lgc+jYicaALqnhv+Tx/1ADi+/MxvCUfA816jyJf4ZwZJhBNhHdwmRep/Uxv7JmTlG3BAHzprwIvwsYU9+p9Qbng38bO/EF7u54PjQnJIgg9qGRzTtFADmtX3P/hNg4y6P0WKBhSoVtw52LQ==; _U=1ZTU6YJDFgMNCgvA3mcW5-rHtoU7giV4bj99PvueTR0JS88CtyvZmP05H5jIuP6B3GofHM6x8gyGNkk5rf3JutAB1kZ7lFSXSlsgHVdYM4nOqrioE4M7yHWit_rW0OpK7DufLne1XJyWJj_Zagx0PxuadcYyj3xGR49Ky0Pt43mA4ex7Px4nBdydWwtPIsI6JEzdqAcvwyVWWRSPE5tWY7Q; WLS=C=1c2bae6e0ca9eec0&N=minghui; WLID=ExKxHWGUnmgDBFsW3u3VPp2ogHBTvVktGdfD2TQXV9GIyghIRu27iB/PUZqjbL8KGRhGNsl1STqbn3FAHUaAL40kMq/PUmMoW6g/yFjxvYc=; SnrOvr=X=rebateson; _SS=SID=1670784759976E893B246B8C584E6FEF&R=5151&RB=5151&GB=0&RG=0&RP=5151; _RwBf=mta=0&rc=5151&rb=5151&gb=0&rg=0&pc=5151&mtu=0&rbb=0.0&g=0&cid=&clo=0&v=10&l=2024-01-29T08:00:00.0000000Z&lft=0001-01-01T00:00:00.0000000&aof=0&o=0&p=bingcopilotwaitlist&c=MY00IA&t=220&s=2023-02-25T08:08:17.9784942+00:00&ts=2024-01-30T02:02:54.4579117+00:00&rwred=0&wls=2&wlb=0&lka=0&lkt=0&aad=0&TH=&e=imWBu0hNlJPKvXDCygV52V8NpcTNmBIWlT8rKjL9vYeLaHngfkhUS4MCzVEbyvn7kuLqG_uCHmyyOo-_1q0ayg&wle=0&ccp=0&ard=0001-01-01T00:00:00.0000000&rwdbt=0001-01-01T16:00:00.0000000-08:00&A=D655DC298B2EDB3681F8124DFFFFFFFF&r=1; SRCHHPGUSR=SRCHLANG=zh-Hans&PV=13.3.1&BRW=XW&BRH=T&CW=2191&CH=1048&SCW=2176&SCH=1183&DPR=1.0&UTC=480&DM=0&PRVCW=1996&PRVCH=1048&CIBV=1.1418.9-suno&cdxtone=Balanced&cdxtoneopts=galileo,saharagenconv5
+"""
+os.environ["BING_COOKIES"] = cookies.strip()
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_ask_precise() -> bool:
+    expected_responses = [
+        "Hello! This is Bing. How can I help you today? 😊",
+        "Hello! How can I help you today? 😊",
+        "Hello! How can I assist you today?",
+    ]
+
+    async with SydneyClient(style="precise") as sydney:
+        logger.info("------------------+++++++1")
+        response = await sydney.ask("Hello, Bing!")
+        logger.info(response)
+        logger.info("------------------+++++++2")
+
+        async for response_token in sydney.ask_stream("Hello, Bing!"):
+            logger.info(response_token)
+    logger.info("------------------+++++++3")
+    return True
diff --git a/test.py b/tests/test.py
similarity index 95%
rename from test.py
rename to tests/test.py
index 88e4ba3..47f6115 100644
--- a/test.py
+++ b/tests/test.py
@@ -41,10 +41,10 @@ def multiple_messages_test(**kwargs):
 if __name__ == "__main__":
     api_key = sys.argv[1]
     openai.api_key = api_key
-    single_message_test()
+    # single_message_test()
     time.sleep(2)
-    single_message_test(stream=True)
-    time.sleep(2)
-    multiple_messages_test()
+    # single_message_test(stream=True)
     # time.sleep(2)
+    multiple_messages_test()
+    time.sleep(2)
     # multiple_messages_test(stream=True)

From 8f2ca95e3e795cc742906e8332c76a51cc12c38c Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Sun, 4 Feb 2024 15:06:45 +0800
Subject: [PATCH 07/24] update requirements

---
 requirements.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 136f45b..b89b243 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ annotated-types==0.5.0
 anyio==3.7.1
 async-timeout==4.0.3
 attrs==23.1.0
+Brotli==1.1.0
 cachetools==5.3.1
 certifi==2023.7.22
 cffi==1.16.0
@@ -14,29 +15,41 @@ dataclasses==0.6
 exceptiongroup==1.1.3
 fastapi==0.103.2
 frozenlist==1.4.0
+gevent==23.9.1
+greenlet==3.0.0
 h11==0.14.0
 httpcore==0.18.0
 httptools==0.6.0
 idna==3.4
+iniconfig==2.0.0
 loguru==0.7.2
 multidict==6.0.4
 openai==0.28.1
+packaging==23.2
+pluggy==1.4.0
 pycparser==2.21
 pydantic==2.4.2
 pydantic_core==2.10.1
 PyJWT==2.8.0
+pytest==7.4.4
+pytest-asyncio==0.23.4
 python-dotenv==1.0.0
+python-multipart==0.0.6
 PyYAML==6.0.1
 regex==2023.8.8
 requests==2.31.0
 sniffio==1.3.0
 starlette==0.27.0
 tiktoken==0.5.1
+tomli==2.0.1
 tqdm==4.66.1
 typing_extensions==4.8.0
 urllib3==2.0.5
 uvicorn==0.23.2
 uvloop==0.17.0
 watchfiles==0.20.0
+websocket==0.2.1
 websockets==11.0.3
 yarl==1.9.2
+zope.event==5.0
+zope.interface==6.1

From 74596321df84868185fcc30ff75cf07e3b640679 Mon Sep 17 00:00:00 2001
From: "shijie.chen" <xingwozhonghua@126.com>
Date: Thu, 7 Mar 2024 12:36:14 +0800
Subject: [PATCH 08/24] [dev] add model-config.template gemini

---
 model-config.template | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/model-config.template b/model-config.template
index df75341..972ac6a 100644
--- a/model-config.template
+++ b/model-config.template
@@ -86,6 +86,16 @@
             }
         }
     },
+    {
+        "token": "gemini-7c7aa4a3549f5",
+        "type": "gemini",
+        "config": {
+            "api_key": "xxxxx",
+            "proxies": {
+                "https": "http://localhost:7890"
+            }
+        }
+    },
     {
         "token": "bing-7c7aa4a3549f5",
         "type": "bing-sydney",

From 5ff42e04d4e8404b385314e6fb9a994888931325 Mon Sep 17 00:00:00 2001
From: w568w <1278297578@qq.com>
Date: Thu, 28 Mar 2024 08:50:07 +0000
Subject: [PATCH 09/24] fix: the request parameters for Xunfei Spark can be
 outdated after initialization

So the URL needs to be regenerated for each request.
---
 clients/xunfei_spark/api/spark_api.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/clients/xunfei_spark/api/spark_api.py b/clients/xunfei_spark/api/spark_api.py
index d3fe083..78771f0 100644
--- a/clients/xunfei_spark/api/spark_api.py
+++ b/clients/xunfei_spark/api/spark_api.py
@@ -96,13 +96,11 @@ def __init__(
         self.api_key = api_key
         self.api_secret = api_secret
         self.api_model = api_model
-        self._wss_url = None
 
     def create_wss_connection(self):
-        if self._wss_url is None:
-            api_url = MODEL_MAP[self.api_model]["url"]
-            self._wss_url = get_wss_url(api_url, self.api_secret, self.api_key)
-        return ws_connect(self._wss_url)
+        api_url = MODEL_MAP[self.api_model]["url"]
+        wss_url = get_wss_url(api_url, self.api_secret, self.api_key)
+        return ws_connect(wss_url)
 
     def build_query(self, messages, **kwargs):
         query = {

From 572bc977276128418c9fc47a396867cd52a84834 Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Wed, 3 Apr 2024 14:33:04 +0800
Subject: [PATCH 10/24] =?UTF-8?q?feat:=E6=94=AF=E6=8C=81=E9=80=9A=E4=B9=89?=
 =?UTF-8?q?=E5=8D=83=E9=97=AE=EF=BC=8C=E4=BC=98=E5=8C=96=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                    |  20 ++++-
 adapters/adapter_factory.py  |  10 ++-
 adapters/azure.py            |   3 +-
 adapters/base.py             | 150 +++++++++++++++++++++++++++++++----
 adapters/claude.py           |  87 ++++++--------------
 adapters/claude_web.py       |  50 +-----------
 adapters/gemini_adapter.py   |  60 +++-----------
 adapters/protocol.py         |   8 +-
 adapters/proxy.py            |   3 +-
 adapters/qwen.py             | 129 ++++++++++++++++++++++++++++++
 adapters/xunfei_spark.py     |  67 +++++-----------
 adapters/zhipu_api.py        |  85 +++++++-------------
 clients/claude_web_client.py |   2 +
 model-config.template        |   8 ++
 open-api.py                  |  32 +++++---
 tests/test.py                |  72 +++++++++++------
 utils/http_util.py           |  63 ---------------
 17 files changed, 459 insertions(+), 390 deletions(-)
 create mode 100644 adapters/qwen.py
 delete mode 100644 utils/http_util.py

diff --git a/README.md b/README.md
index 4550f3e..8b23270 100644
--- a/README.md
+++ b/README.md
@@ -19,13 +19,19 @@
   - [ ] 百度文心一言
   - [x] 讯飞星火
   - [x] gemini
+  - [x] 通义千问
   - [ ] ...
 - [x] 支持stream方式调用
 - [x] 支持open ai的第三方代理服务，比如openai-sb等
 - [x] 支持在线更新配置 `http://0.0.0.0:8090/`（这个前端页面和交互完全是用gpt写的 哈哈）
 - [x] 支持负载均衡，一个key可轮训/随机/并行等访问多个模型
 - [x] 支持按照model_name进行路由
-- [ ] 错误和异常处理优化，对齐openai errorcode
+
+**更新日志**
+
+2024-04-03
+- 支持通义千问
+- 优化异常处理
 
 
 ## 快速开始
@@ -188,10 +194,18 @@
             "cookie": "xxxxx",
             "style": "balanced"
         }
+    },
+    {
+        "token":"qwen-111111xxxx",
+        "type":"qwen",
+        "config":{
+            "api_key":"sk-xxxxxxxx",
+            "model":"qwen-turbo"
+        }
     }
     ]
 
 
 ## 项目部分代码来自于以下开源项目，感谢🙏
-https://github.com/vsakkas/sydney.py
-https://github.com/suqingdong/sparkapi
\ No newline at end of file
+ - https://github.com/vsakkas/sydney.py
+ - https://github.com/suqingdong/sparkapi
\ No newline at end of file
diff --git a/adapters/adapter_factory.py b/adapters/adapter_factory.py
index 957b48e..e83a3f4 100644
--- a/adapters/adapter_factory.py
+++ b/adapters/adapter_factory.py
@@ -1,6 +1,6 @@
 from loguru import logger
 from adapters.azure import AzureAdapter
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, invalid_request_error
 from adapters.claude import ClaudeModel
 from adapters.claude_web import ClaudeWebModel
 from adapters.proxy import ProxyAdapter
@@ -10,14 +10,14 @@
 from adapters.model_name_router_adapter import ModelNameRouterAdapter
 from adapters.gemini_adapter import GeminiAdapter
 from adapters.bing_sydney import BingSydneyModel
-
+from adapters.qwen import QWenAdapter
 model_instance_dict = {}
 
 
 def get_adapter(instanceKey: str):
     model = model_instance_dict.get(instanceKey)
     if model is None:
-        raise Exception("model not found")
+        raise invalid_request_error("model not found")
     return model
 
 
@@ -51,10 +51,12 @@ def init_adapter(instanceKey: str, type: str, **kwargs) -> ModelAdapter:
             model = GeminiAdapter(**kwargs)
         elif type == "bing-sydney":
             model = BingSydneyModel(**kwargs)
+        elif type == "qwen":
+            model = QWenAdapter(**kwargs)
         else:
             raise ValueError(f"unknown model type: {type}")
     except Exception as e:
-        logger.error(f"init model failed {instanceKey},{type},{kwargs}: {e}")
+        logger.exception(f"init model failed {instanceKey},{type},{kwargs}: {e}")
     if model is not None:
         model_instance_dict[instanceKey] = model
     return model
diff --git a/adapters/azure.py b/adapters/azure.py
index a5fa123..9d1f905 100644
--- a/adapters/azure.py
+++ b/adapters/azure.py
@@ -2,11 +2,10 @@
 
 import json
 from typing import Iterator, Union
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, post, stream
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
 import requests
 from loguru import logger
-from utils.http_util import post, stream
 
 
 class AzureAdapter(ModelAdapter):
diff --git a/adapters/base.py b/adapters/base.py
index e17a1a3..d1bca62 100644
--- a/adapters/base.py
+++ b/adapters/base.py
@@ -1,7 +1,106 @@
+import json
 import time
 from typing import Union, Iterator
+
+from openai import OpenAIError
+import requests
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
 from utils.util import num_tokens_from_string
+from loguru import logger
+
+
+api_timeout_seconds = 300
+
+"""
+http:
+status_code:429
+
+body:
+{
+    "error": {
+        "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.",
+        "type": "insufficient_quota",
+        "param": null,
+        "code": "insufficient_quota"
+    }
+}
+
+https://platform.openai.com/docs/guides/error-codes/api-errors
+"""
+
+
+class UDFApiError(OpenAIError):
+    def __init__(self, message, status: int = 500, code="server_error"):
+        super(UDFApiError, self).__init__(message)
+        self.http_status = status
+        self._message = message
+
+
+def authentication_error():
+    return UDFApiError("Invalid Authentication", 401, "")
+
+
+def rate_limit_error(message):
+    return UDFApiError(message, 429, "")
+
+
+def serverError(message):
+    return UDFApiError(message, 500)
+
+
+def invalid_request_error(message):
+    return UDFApiError(message, 400)
+
+
+def resp_text(resp):
+    resp_str = None
+    if resp is not None:
+        resp_str = f"status_code:{resp.status_code}: {resp.text}"
+    return resp_str
+
+
+def post(
+    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
+):
+    resp = None
+    try:
+        resp = requests.post(
+            url=api_url,
+            headers=headers,
+            data=json.dumps(params),
+            timeout=timeout,
+            proxies=proxies,
+        )
+        if requests.codes.ok != resp.status_code:
+            raise UDFApiError(resp.text, resp.status_code)
+        return json.loads(resp.text)
+    finally:
+        logger.debug(
+            f"【http.post】 请求url：{api_url}, headers:{headers}, params:{params}, resp:{resp_text(resp)}"
+        )
+
+
+def stream(
+    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
+):
+    resp = None
+    try:
+        resp = requests.post(
+            api_url,
+            stream=True,
+            headers=headers,
+            json=params,
+            # data=json.dumps(params),
+            timeout=timeout,
+            proxies=proxies,
+        )
+        if requests.codes.ok != resp.status_code:
+            raise UDFApiError(resp.text, resp.status_code)
+        return resp
+    finally:
+        logger.debug(
+            f"【http.stream】 请求url：{api_url}, headers:{headers}, params:{params}, resp:{resp_text(resp)}"
+        )
 
 
 class ModelAdapter:
@@ -17,17 +116,27 @@ def chat_completions(
         """
         pass
 
-    def completion_to_openai_response(self, completion: str, model: str = "default"):
-        completion_tokens = num_tokens_from_string(completion)
+    # completion 转 openai_response
+    def completion_to_openai_response(
+        self, completion: str, model: str = "default", **kargs
+    ):
+        completion_tokens = kargs.get("completion_tokens")
+        if completion_tokens is None:
+            completion_tokens = num_tokens_from_string(completion)
+        prompt_tokens = kargs.get("prompt_tokens", 0)
+        total_tokens = prompt_tokens + completion_tokens
+        id = kargs.get("id", f"chatcmpl-{str(time.time())}")
+        finish_reason = kargs.get("finish_reason", "stop")
+        created = kargs.get("created", int(time.time()))
         openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
+            "id": id,
             "object": "chat.completion",
-            "created": int(time.time()),
+            "created": created,
             "model": model,
             "usage": {
-                "prompt_tokens": 0,
+                "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
+                "total_tokens": total_tokens,
             },
             "choices": [
                 {
@@ -36,25 +145,36 @@ def completion_to_openai_response(self, completion: str, model: str = "default")
                         "content": completion,
                     },
                     "index": 0,
-                    "finish_reason": "stop",
+                    "finish_reason": finish_reason,
                 }
             ],
         }
         return openai_response
 
+
+
+    # stream下  每次的completion  转  openai_stream_response
     def completion_to_openai_stream_response(
-        self, completion: str, model: str = "default"
+        self, completion: str, model: str = "default", index = 0, **kargs
     ):
-        completion_tokens = num_tokens_from_string(completion)
+        completion_tokens = kargs.get("completion_tokens")
+        if completion_tokens is None:
+            completion_tokens = num_tokens_from_string(completion)
+        prompt_tokens = kargs.get("prompt_tokens", 0)
+        total_tokens = prompt_tokens + completion_tokens
+        id = kargs.get("id", f"chatcmpl-{str(time.time())}")
+        finish_reason = kargs.get("finish_reason", "stop")
+        created = kargs.get("created", int(time.time()))
+        index = kargs.get("index", 0)
         openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
+            "id": id,
             "object": "chat.completion.chunk",
-            "created": int(time.time()),
+            "created": created,
             "model": model,
             "usage": {
-                "prompt_tokens": 0,
+                "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
+                "total_tokens": total_tokens,
             },
             "choices": [
                 {
@@ -62,8 +182,8 @@ def completion_to_openai_stream_response(
                         "role": "assistant",
                         "content": completion,
                     },
-                    "index": 0,
-                    "finish_reason": "stop",
+                    "index": index,
+                    "finish_reason": finish_reason,
                 }
             ],
         }
diff --git a/adapters/claude.py b/adapters/claude.py
index b58d8a4..338cf3d 100644
--- a/adapters/claude.py
+++ b/adapters/claude.py
@@ -1,12 +1,11 @@
 import json
 from typing import Iterator
 import requests
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, post, stream
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
 from loguru import logger
 from utils.util import num_tokens_from_string
 import time
-from utils.http_util import post, stream
 
 # 默认的model映射，不过request中的model参数会被config覆盖
 model_map = {
@@ -39,10 +38,12 @@ def __init__(self, **kwargs):
         self.model = kwargs.pop("model", None)
         self.config_args = kwargs
 
-    def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
-        '''
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
+        """
         https://docs.anthropic.com/claude/reference/getting-started-with-the-api
-        '''
+        """
         openai_params = request.model_dump_json()
         claude_params = self.openai_to_claude_params(openai_params)
         url = "https://api.anthropic.com/v1/complete"
@@ -56,7 +57,7 @@ def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompl
             response = stream(url, headers, claude_params)
             for chunk in response.iter_lines(chunk_size=1024):
                 # 移除头部data: 字符
-                decoded_line = chunk.decode('utf-8')
+                decoded_line = chunk.decode("utf-8")
                 logger.info(f"decoded_line: {decoded_line}")
                 decoded_line = decoded_line.lstrip("data:").strip()
                 json_line = json.loads(decoded_line)
@@ -72,10 +73,8 @@ def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompl
                 else:
                     completion = json_line.get("completion")
                     if completion:
-                        openai_response = (
-                            self.claude_to_chatgpt_response_stream(
-                                decoded_line
-                            )
+                        openai_response = self.claude_to_chatgpt_response_stream(
+                            decoded_line
                         )
                 if openai_response:
                     yield ChatCompletionResponse(**openai_response)
@@ -123,58 +122,22 @@ def openai_to_claude_params(self, openai_params):
 
     def claude_to_chatgpt_response_stream(self, claude_response):
         completion = claude_response.get("completion", "")
-        completion_tokens = num_tokens_from_string(completion)
-        openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": "gpt-3.5-turbo-0613",
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "delta": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": stop_reason_map[claude_response.get("stop_reason")]
-                    if claude_response.get("stop_reason")
-                    else None,
-                }
-            ],
-        }
-        return openai_response
+        finish_reason = (
+            stop_reason_map[claude_response.get("stop_reason")]
+            if claude_response.get("stop_reason")
+            else None
+        )
+        return self.completion_to_openai_stream_response(
+            completion, self.model, finish_reason=finish_reason
+        )
 
     def claude_to_chatgpt_response(self, claude_response):
-        completion_tokens = num_tokens_from_string(
-            claude_response.get("completion", "")
+        completion = claude_response.get("completion", "")
+        finish_reason = (
+            stop_reason_map[claude_response.get("stop_reason")]
+            if claude_response.get("stop_reason")
+            else None
+        )
+        return self.completion_to_openai_response(
+            completion, self.model, finish_reason=finish_reason
         )
-        openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": self.model,
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": claude_response.get("completion", ""),
-                    },
-                    "index": 0,
-                    "finish_reason": stop_reason_map[claude_response.get("stop_reason")]
-                    if claude_response.get("stop_reason")
-                    else None,
-                }
-            ],
-        }
-
-        return openai_response
diff --git a/adapters/claude_web.py b/adapters/claude_web.py
index 8c6b687..be3ad55 100644
--- a/adapters/claude_web.py
+++ b/adapters/claude_web.py
@@ -54,53 +54,7 @@ def chat_completions(
         yield ChatCompletionResponse(**resp)
 
     def claude_to_openai_stream_response(self, completion: str):
-        completion_tokens = num_tokens_from_string(completion)
-        openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": "claude-2",
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "delta": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-
-        return openai_response
+        return self.completion_to_openai_stream_response(completion)
 
     def claude_to_openai_response(self, completion: str):
-        completion_tokens = num_tokens_from_string(completion)
-        openai_response = {
-            "id": f"chatcmpl-{str(time.time())}",
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": "claude-2",
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-
-        return openai_response
+        return self.completion_to_openai_response(completion)
diff --git a/adapters/gemini_adapter.py b/adapters/gemini_adapter.py
index d2fbe68..b090884 100644
--- a/adapters/gemini_adapter.py
+++ b/adapters/gemini_adapter.py
@@ -2,14 +2,16 @@
 import time
 from typing import Dict, Iterator, List
 import uuid
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, post
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse, ChatMessage
-import requests
-from utils.http_util import post, stream
-from loguru import logger
 from utils.util import num_tokens_from_string
 
 """
+
+https://ai.google.dev/tutorials/rest_quickstart
+
+
+
  curl -x http://127.0.0.1:7890 https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key= \
     -H 'Content-Type: application/json' \
     -X POST \
@@ -117,55 +119,11 @@ def chat_completions(
 
     def response_convert_stream(self, data):
         completion = data["candidates"][0]["content"]["parts"][0]["text"]
-        completion_tokens = num_tokens_from_string(completion)
-        openai_response = {
-            "id": str(uuid.uuid1()),
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": self.model,
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "delta": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-        return openai_response
+        return self.completion_to_openai_stream_response(completion, self.model)
 
     def response_convert(self, data):
         completion = data["candidates"][0]["content"]["parts"][0]["text"]
-        completion_tokens = num_tokens_from_string(completion)
-        openai_response = {
-            "id": str(uuid.uuid1()),
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": self.model,
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-        return openai_response
+        return self.completion_to_openai_response(completion, self.model)
 
     """
     [
@@ -189,7 +147,7 @@ def convert_messages_to_prompt(
             role = message.role
             if role in ["function"]:
                 raise Exception(f"不支持的功能:{role}")
-            if role == "system":  # 将system转为user   这里可以使用  CharacterGLM
+            if role == "system":  # 将system转为user
                 role = "user"
                 content = self.prompt.format(system=message.content)
                 prompt.append({"role": role, "parts": [{"text": content}]})
diff --git a/adapters/protocol.py b/adapters/protocol.py
index b0d3f16..c05936b 100644
--- a/adapters/protocol.py
+++ b/adapters/protocol.py
@@ -43,13 +43,15 @@ class ChatCompletionRequest(BaseModel):
 class ChatCompletionResponseChoice(BaseModel):
     index: int
     message: ChatMessage
-    finish_reason: Literal["stop", "length", "function_call"]
+    # finish_reason: Literal["stop", "length", "function_call"]
+    finish_reason: Optional[str]
 
 
 class ChatCompletionResponseStreamChoice(BaseModel):
     index: int
     delta: DeltaMessage
-    finish_reason: Optional[Literal["stop", "length"]]
+    # finish_reason: Optional[Literal["stop", "length"]]
+    finish_reason: Optional[str]
 
 
 class Usage(BaseModel):
@@ -67,3 +69,5 @@ class ChatCompletionResponse(BaseModel):
     ]
     created: Optional[int] = Field(default_factory=lambda: int(time.time()))
     usage: Optional[Usage] = None
+
+
diff --git a/adapters/proxy.py b/adapters/proxy.py
index cbbd109..6ae2885 100644
--- a/adapters/proxy.py
+++ b/adapters/proxy.py
@@ -1,9 +1,8 @@
 import json
 from typing import Iterator
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, stream, post
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
 from loguru import logger
-from utils.http_util import stream, post
 
 
 class ProxyAdapter(ModelAdapter):
diff --git a/adapters/qwen.py b/adapters/qwen.py
new file mode 100644
index 0000000..866447b
--- /dev/null
+++ b/adapters/qwen.py
@@ -0,0 +1,129 @@
+import copy
+import json
+from typing import Iterator
+from adapters.base import ModelAdapter, serverError, post, stream
+from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
+from loguru import logger
+
+
+class QWenAdapter(ModelAdapter):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.api_key = kwargs.pop("api_key")
+        self.model = kwargs.pop("model")
+        self.config_args = kwargs
+        self.url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
+
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
+
+        data = self.openai_req_2_qw_req(request)
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        if request.stream:
+            headers["X-DashScope-SSE"] = "enable"
+            response = stream(self.url, headers, params=data)
+            index = 0
+            error = False
+            last_output = None
+            for chunk in response.iter_lines(chunk_size=1024):
+                # 移除头部data: 字符
+                decoded_line = chunk.decode("utf-8")
+                logger.info(f"decoded_line: {decoded_line}")
+                if decoded_line.startswith("id"):
+                    index = int(decoded_line.lstrip("id:").strip())
+                if decoded_line.startswith("event:error"):
+                    error = True
+                if not decoded_line.startswith("data:"):
+                    continue
+                decoded_line = decoded_line.lstrip("data:").strip()
+                if error:
+                    raise serverError(decoded_line)
+                output = json.loads(decoded_line)
+                openai_resp = self.qw_resp_2_openai_resp_stream(
+                    self.qw_stream_output_handle(output, last_output), index
+                )
+                last_output = output
+                yield ChatCompletionResponse(**openai_resp)
+
+        else:
+            response = post(self.url, headers=headers, params=data)
+
+            yield ChatCompletionResponse(**self.qw_resp_2_openai_resp(response))
+
+    def qw_stream_output_handle(self, output: dict, last_output: dict):
+        prompt_tokens = output["usage"]["input_tokens"]
+        completion_tokens = output["usage"]["output_tokens"]
+        content = output["output"]["choices"][0]["message"]["content"]
+        try:
+            last_prompt_tokens = last_output["usage"]["input_tokens"]
+            last_completion_tokens = last_output["usage"]["output_tokens"]
+            last_content = last_output["output"]["choices"][0]["message"]["content"]
+        except:
+            last_prompt_tokens = 0
+            last_completion_tokens = 0
+            last_content = ""
+        new_output = copy.deepcopy(output)
+        new_output["usage"]["input_tokens"] = prompt_tokens - last_prompt_tokens
+        new_output["usage"]["output_tokens"] = completion_tokens - last_completion_tokens
+        new_output["output"]["choices"][0]["message"]["content"] = content[
+            len(last_content) :
+        ]
+        return new_output
+
+    def qw_resp_2_openai_resp_stream(self, response: dict, index: int) -> dict:
+        id = response["request_id"]
+        prompt_tokens = response["usage"]["input_tokens"]
+        completion_tokens = response["usage"]["output_tokens"]
+        content = response["output"]["choices"][0]["message"]["content"]
+        finish_reason = response["output"]["choices"][0]["finish_reason"]
+        if finish_reason == "null":
+            finish_reason = None
+        return self.completion_to_openai_stream_response(
+            content,
+            self.model,
+            index,
+            finish_reason=finish_reason,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            id=id,
+        )
+
+    def qw_resp_2_openai_resp(self, response: dict) -> dict:
+        id = response["request_id"]
+        prompt_tokens = response["usage"]["input_tokens"]
+        completion_tokens = response["usage"]["output_tokens"]
+        content = response["output"]["choices"][0]["message"]["content"]
+        return self.completion_to_openai_response(
+            content,
+            model=self.model,
+            id=id,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+    def openai_req_2_qw_req(self, request: ChatCompletionRequest) -> dict:
+        d = {}
+        if self.model:
+            d["model"] = self.model
+        else:
+            d["model"] = request.model
+        if len(self.config_args) > 0:
+            d["parameters"] = self.config_args
+        else:
+            parameters = {}
+            if request.temperature:
+                parameters["temperature"] = request.temperature
+            if request.top_p:
+                parameters["top_p"] = request.top_p
+            d["parameters"] = parameters
+        d["parameters"]["result_format"] = "message"
+        d["input"] = {}
+        d["input"]["messages"] = [
+            m.model_dump(exclude_none=True) for m in request.messages
+        ]
+        return d
diff --git a/adapters/xunfei_spark.py b/adapters/xunfei_spark.py
index b1e0396..59637a7 100644
--- a/adapters/xunfei_spark.py
+++ b/adapters/xunfei_spark.py
@@ -77,31 +77,18 @@ def client_response_2_chatgpt_response_stream(self, resp_json):
             prompt_tokens = usage["prompt_tokens"]
             completion_tokens = usage["completion_tokens"]
             total_tokens = usage["total_tokens"]
-
-        openai_response = {
-            "id": resp_json["header"]["sid"],
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": "gpt-3.5-turbo-0613",
-            "usage": {
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens,
-            },
-            "choices": [
-                {
-                    "delta": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": "stop"
-                    if resp_json["payload"]["choices"]["status"] == 2
-                    else None,
-                }
-            ],
-        }
-        return openai_response
+        id = resp_json["header"]["sid"]
+        finish_reason = (
+            "stop" if resp_json["payload"]["choices"]["status"] == 2 else None
+        )
+        return self.completion_to_openai_stream_response(
+            completion,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            id=id,
+            finish_reason=finish_reason,
+        )
 
     def client_response_to_chatgpt_response(self, iter_resp):
         completions = []
@@ -124,25 +111,11 @@ def client_response_to_chatgpt_response(self, iter_resp):
                 prompt_tokens = usage["prompt_tokens"]
                 completion_tokens = usage["completion_tokens"]
                 total_tokens = usage["total_tokens"]
-        openai_response = {
-            "id": id,
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": "gpt-3.5-turbo-0613",
-            "usage": {
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens,
-            },
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": "".join(completions),
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-        return openai_response
+        content = "".join(completions)
+        return self.completion_to_openai_response(
+            content,
+            prompt_tokens=prompt_tokens,
+            completion_toke=completion_tokens,
+            total_tokens=total_tokens,
+            id=id,
+        )
diff --git a/adapters/zhipu_api.py b/adapters/zhipu_api.py
index 900f327..7d26e65 100644
--- a/adapters/zhipu_api.py
+++ b/adapters/zhipu_api.py
@@ -1,7 +1,5 @@
-
-
 from typing import Dict, Iterator, List
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, post, stream
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse, ChatMessage
 import time
 
@@ -11,7 +9,6 @@
 from utils.util import num_tokens_from_string
 
 from utils.sse_client import SSEClient
-from utils.http_util import post, stream
 
 API_TOKEN_TTL_SECONDS = 3 * 60
 
@@ -55,14 +52,17 @@ def __init__(self, **kwargs):
         self.api_key = kwargs.pop("api_key", None)
         self.model = kwargs.pop("model", None)
         self.prompt = kwargs.pop(
-            "prompt", "You need to follow the system settings:{system}")
+            "prompt", "You need to follow the system settings:{system}"
+        )
         self.config_args = kwargs
 
-    def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
-        '''
+    def chat_completions(
+        self, request: ChatCompletionRequest
+    ) -> Iterator[ChatCompletionResponse]:
+        """
         https://open.bigmodel.cn/dev/api#http
         https://open.bigmodel.cn/dev/api#sdk
-        '''
+        """
         # 发起post请求
         model = self.model if self.model else request.model
         invoke_method = "sse-invoke" if request.stream else "invoke"
@@ -74,7 +74,9 @@ def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompl
             event_data = SSEClient(data)
             for event in event_data.events():
                 logger.debug(f"chat_completions event: {event}")
-                yield ChatCompletionResponse(**self.convert_response_stream(event, model))
+                yield ChatCompletionResponse(
+                    **self.convert_response_stream(event, model)
+                )
         else:
             global headers
             headers.update({"Authorization": token})
@@ -85,55 +87,26 @@ def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompl
     def convert_response(self, resp, model):
         resp = resp["data"]
         req_id = resp["request_id"]
-        openai_response = {
-            "id": f"chatcmpl-{req_id}",
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": model,
-            "usage": {
-                "prompt_tokens": resp["usage"]["prompt_tokens"],
-                "completion_tokens": resp["usage"]["completion_tokens"],
-                "total_tokens": resp["usage"]["total_tokens"],
-            },
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": resp["choices"][0]["content"],
-                    },
-                    "index": 0,
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-        return openai_response
+        prompt_tokens = resp["usage"]["prompt_tokens"]
+        completion_tokens = resp["usage"]["completion_tokens"]
+        content = resp["choices"][0]["content"]
+        return self.completion_to_openai_response(
+            content,
+            model=model,
+            id=req_id,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
 
     def convert_response_stream(self, event_data, model):
         completion = event_data.data
-        completion_tokens = num_tokens_from_string(completion)
         finish_reason = "stop" if event_data.event == "finish" else None
-        openai_response = {
-            "id": f"chatcmpl-{event_data.id}",
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": model,
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": completion_tokens,
-                "total_tokens": completion_tokens,
-            },
-            "choices": [
-                {
-                    "delta": {
-                        "role": "assistant",
-                        "content": completion,
-                    },
-                    "index": 0,
-                    "finish_reason": finish_reason,
-                }
-            ],
-        }
-        return openai_response
+        return self.completion_to_openai_stream_response(
+            completion,
+            model,
+            finish_reason=finish_reason,
+            id=f"chatcmpl-{event_data.id}",
+        )
 
     def convert_params(self, request: ChatCompletionRequest) -> Dict:
         """
@@ -153,7 +126,9 @@ def convert_params(self, request: ChatCompletionRequest) -> Dict:
             params["top_p"] = top_p
         return params
 
-    def convert_messages_to_prompt(self, messages: List[ChatMessage]) -> List[Dict[str, str]]:
+    def convert_messages_to_prompt(
+        self, messages: List[ChatMessage]
+    ) -> List[Dict[str, str]]:
         prompt = []
         for message in messages:
             role = message.role
diff --git a/clients/claude_web_client.py b/clients/claude_web_client.py
index 8213011..653be58 100644
--- a/clients/claude_web_client.py
+++ b/clients/claude_web_client.py
@@ -4,6 +4,7 @@
 import requests as req
 from curl_cffi import requests
 import re
+from loguru import logger
 
 # 参考 https://github.com/KoushikNavuluri/Claude-API
 
@@ -32,6 +33,7 @@ def get_organization_id(self):
 
         response = requests.get(url, headers=headers,
                                 impersonate="chrome110", proxies=self.proxies)
+        logger.debug(response.status_code, response.text)
         res = json.loads(response.text)
         uuid = res[0]['uuid']
 
diff --git a/model-config.template b/model-config.template
index 972ac6a..f5be67b 100644
--- a/model-config.template
+++ b/model-config.template
@@ -103,5 +103,13 @@
             "cookie": "xxxxx",
             "style": "balanced"
         }
+    },
+    {
+        "token":"qwen-111111xxxx",
+        "type":"qwen",
+        "config":{
+            "api_key":"sk-xxxxxxxx",
+            "model":"qwen-turbo"
+        }
     }
 ]
\ No newline at end of file
diff --git a/open-api.py b/open-api.py
index 24c09f7..59176f4 100644
--- a/open-api.py
+++ b/open-api.py
@@ -5,7 +5,7 @@
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from fastapi.routing import APIRouter
 from pydantic import BaseModel
-from adapters.base import ModelAdapter
+from adapters.base import ModelAdapter, UDFApiError, serverError
 from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
 from typing import Iterator, List, Optional
 from adapters.adapter_factory import get_adapter
@@ -86,9 +86,11 @@ def check_admin_token(
     )
 
 
-def convert(resp: Iterator[ChatCompletionResponse]):
-    for response in resp:
-        yield f"data: {response.model_dump_json(exclude_none=True)}\n\n"
+def convert(first_resp: ChatCompletionResponse, resp: Iterator[ChatCompletionResponse]):
+    yield f"data: {first_resp.model_dump_json(exclude_none=True)}\n\n"
+    yield from (
+        f"data: {response.model_dump_json(exclude_none=True)}\n\n" for response in resp
+    )
     yield "data: [DONE]\n\n"
 
 
@@ -103,12 +105,22 @@ def create_chat_completion(
     request: ChatCompletionRequest, model: ModelAdapter = Depends(check_api_key)
 ):
     logger.info(f"request: {request},  model: {model}")
-    resp = model.chat_completions(request)
-    if request.stream:
-        return StreamingResponse(convert(resp), media_type="text/event-stream")
-    else:
-        openai_response = next(resp)
-        return JSONResponse(content=openai_response.model_dump(exclude_none=True))
+    try:
+        resp = model.chat_completions(request)
+        if request.stream:
+            # 为了让生成器中的异常，在这里被捕获，StreamingResponse中会吞掉异常
+            first_respose = next(resp)
+            return StreamingResponse(
+                convert(first_respose, resp), media_type="text/event-stream"
+            )
+        else:
+            openai_response = next(resp)
+            return JSONResponse(content=openai_response.model_dump(exclude_none=True))
+    except UDFApiError as ue:
+        return JSONResponse(content=ue._message, status_code=ue.http_status)
+    except Exception as e:
+        logger.exception(e)
+        return JSONResponse(content=str(e), status_code=500)
 
 
 @router.get("/verify")
diff --git a/tests/test.py b/tests/test.py
index 47f6115..c56a319 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,49 +1,69 @@
+import traceback
 import openai
 import sys
 import json
 import time
+from loguru import logger
+
 
-openai.api_base = "http://localhost:8090/v1"
 
 
 def single_message_test(**kwargs):
     print(f"----------single message test {kwargs}----------")
-    completion = openai.ChatCompletion.create(
-        model="azure-gpt-35-turbo",
-        messages=[{"role": "user", "content": "你好"}],
-        **kwargs,
-    )
-    if kwargs.get("stream"):
-        for chunk in completion:
-            print(json.dumps(chunk, ensure_ascii=False))
-    else:
-        print(json.dumps(completion, ensure_ascii=False))
+    try:
+        completion = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "你好"}],
+            **kwargs,
+        )
+        if kwargs.get("stream"):
+            for chunk in completion:
+                print(json.dumps(chunk, ensure_ascii=False))
+        else:
+            print(json.dumps(completion, ensure_ascii=False))
+    except openai.APIError as e:
+        #Handle API error here, e.g. retry or log
+        print(f"OpenAI API returned an API Error: {e}")
+        pass
+    except Exception as e:
+        s = traceback.format_exc()
+        print(s)
+        logger.error(f"An unexpected error occurred: {e}")
+        pass
 
 
 def multiple_messages_test(**kwargs):
     print(f"----------multiple messages test {kwargs}----------")
-    completion = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "user", "content": "你好"},
-            {"role": "assistant", "content": "你好!很高兴认识你。"},
-            {"role": "user", "content": "你是谁？"},
-        ],
-        **kwargs,
-    )
-    if kwargs.get("stream"):
-        for chunk in completion:
-            print(json.dumps(chunk, ensure_ascii=False))
-    else:
-        print(json.dumps(completion, ensure_ascii=False))
+    try:
+        completion = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "你是一个旅行专家, 能够帮我们制定旅行计划"},
+                {"role": "user", "content": "你好"},
+                {"role": "assistant", "content": "你好!很高兴认识你。"},
+                {"role": "user", "content": "你是谁？"},
+            ],
+            **kwargs,
+        )
+        if kwargs.get("stream"):
+            for chunk in completion:
+                print(json.dumps(chunk, ensure_ascii=False))
+        else:
+            print(json.dumps(completion, ensure_ascii=False))
+    except Exception as e:
+        s = traceback.format_exc()
+        print(s)
+        logger.error(f"Exception: {e}")
+
 
 
 if __name__ == "__main__":
     api_key = sys.argv[1]
+    openai.api_base = "http://localhost:8090/v1"
     openai.api_key = api_key
     # single_message_test()
     time.sleep(2)
-    # single_message_test(stream=True)
+    single_message_test(stream=True)
     # time.sleep(2)
     multiple_messages_test()
     time.sleep(2)
diff --git a/utils/http_util.py b/utils/http_util.py
deleted file mode 100644
index d1dfd6d..0000000
--- a/utils/http_util.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import requests
-import json
-from loguru import logger
-
-api_timeout_seconds = 300
-
-
-def post(
-    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
-):
-    resp = None
-    try:
-        resp = requests.post(
-            url=api_url,
-            headers=headers,
-            data=json.dumps(params),
-            timeout=timeout,
-            proxies=proxies,
-        )
-        if requests.codes.ok != resp.status_code:
-            logger.error(f"响应异常：{resp_text(resp)}")
-            raise Exception("响应异常：" + resp_text(resp))
-        return json.loads(resp.text)
-    except Exception as e:
-        logger.exception("请求异常", e)
-        raise e
-    finally:
-        logger.debug(
-            f"【http.post】 请求url：{api_url}, headers:{headers}, params:{params}, resp:{resp_text(resp)}"
-        )
-
-
-def resp_text(resp):
-    resp_str = None
-    if resp:
-        resp_str = f"status_code:{resp.status_code}: {resp.text}"
-    return resp_str
-
-
-def stream(
-    api_url, headers: dict, params: dict, timeout=api_timeout_seconds, proxies=None
-):
-    resp = None
-    try:
-        resp = requests.post(
-            api_url,
-            stream=True,
-            headers=headers,
-            json=params,
-            # data=json.dumps(params),
-            timeout=timeout,
-            proxies=proxies,
-        )
-        if requests.codes.ok != resp.status_code:
-            logger.error(f"响应异常：{resp.text}")
-            raise Exception("请求异常")
-        return resp
-    except Exception as e:
-        logger.exception("请求异常", e)
-    finally:
-        logger.debug(
-            f"【http.stream】 请求url：{api_url}, headers:{headers}, params:{params}"
-        )

From 4b7b72c32f147ae681d7b80d749618c112217f09 Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Wed, 3 Apr 2024 14:35:40 +0800
Subject: [PATCH 11/24] update md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8b23270..e158773 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # openai-style-api
 
 
- ***欢迎有兴趣的大佬提PR***
+ ***本人精力有限，某些模型更新可能无法及时更新，如果遇到问题请提issue，也欢迎有兴趣的大佬提PR***
 
 ## 用途
 屏蔽不同大模型API的差异，统一用openai api标准格式使用大模型, 也可以用来做api-key的二次分发管理; 配置化管理不同大模型调用参数，让你在使用大模型的时候只需关注 api-key 和 messages

From 766fc327d9ded780bf6201a9d7e7d68fce9a2ce2 Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Wed, 3 Apr 2024 18:50:23 +0800
Subject: [PATCH 12/24] =?UTF-8?q?=E4=BC=98=E5=8C=96docker=20=E9=83=A8?=
 =?UTF-8?q?=E7=BD=B2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .dockerignore                                 |  1 +
 Dockerfile                                    |  4 +-
 README.md                                     | 68 +++++++++++++------
 config.py                                     |  4 +-
 docker-compose.yml                            |  8 ++-
 ...nfig.template => model-config-default.json |  0
 open-api.py                                   |  4 +-
 tests/test.py                                 |  2 +-
 8 files changed, 64 insertions(+), 27 deletions(-)
 rename model-config.template => model-config-default.json (100%)

diff --git a/.dockerignore b/.dockerignore
index f37357f..1791458 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,3 +8,4 @@ Dockerfile
 **/__pycache__
 *.pyc
 *.log
+model-config.json
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 12d4dfd..fddff1b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,4 +12,6 @@ RUN pip install --no-cache -r requirements.txt
 
 COPY . ${BUILD_PREFIX}/
 
-# ENTRYPOINT [ "python", "open-ai.py]
\ No newline at end of file
+EXPOSE 8090
+
+CMD ["python", "open-api.py"]
\ No newline at end of file
diff --git a/README.md b/README.md
index e158773..5f796fb 100644
--- a/README.md
+++ b/README.md
@@ -34,25 +34,55 @@
 - 优化异常处理
 
 
-## 快速开始
+## 部署方式
 
+**项目的核心配置依赖model-config.json文件，若是没有model-config.json，默认会使用model-config-default.json启动，这时虽然能启动起来，但是因为api-key等没有配置，无法调用成功。**
+
+### Docker
+
+本地新建一个model-config.json文件，根据下边配置文件示例，进行配置， 然后运行以下命令
+
+    docker pull tianminghui/openai-style-api
+
+    docker run -d -p 8090:8090 \
+    -e ADMIN-TOKEN=admin \
+    -v /path/to/your/model-config.json:/app/model-config.json \
+    tianminghui/openai-style-api
+
+`/path/to/your/model-config.json` 替换成你自己的本地路径
+
+### Docker compose
+clone本项目，或者下载项目中的`docker-compose.yml`文件，修改其中的`/path/to/your/model-config.json`, 然后运行以下命令
+
+    docker-compose up -d
+
+
+### 本地部署
 1. `git clone https://github.com/tian-minghui/openai-style-api.git` 拉取项目代码
-2. `cp model-config.template model-config.json`  并按需修改配置文件model-config.json
- 
-        {
-          "token": "f2b7295fc440db7f",
-          "type": "azure",
-          "config": {
-              "api_base": "https://xxxx.openai.azure.com/",
-              "deployment_id": "xxxx",
-              "api_version": "2023-05-15",
-              "api_key": "xxxx",
-              "temperature": 0.8
-          }
-        }
+2. `cp model-config-default.json model-config.json`  并按需修改配置文件model-config.json
+3.  `pip install -r  requirements.txt` 
+4. 运行 `python open-api.py`
 
-4. 本地化部署直接 `pip install -r  requirements.txt` 后，运行 `python open-api.py`,  docker部署在目录下执行 `docker compose up -d`
-5. 有了api-base: localhost:8090 和 api-key:f2b7295fc440db7f 可以使用了，下边列举了几种使用方式
+
+## 配置说明
+model-config.json 配置文件简单示例
+
+```
+    [{
+        "token": "f2b7295fc440db7f",
+        "type": "openai",  // openai 
+        "config": {
+            "api_base": "https://api.openai.com/v1/",
+            "api_key": "sk-xxxxxx",
+            "model": "gpt-3.5-turbo"
+            "temperature": 0.8
+        }
+    }]
+```
+- 整个文件是一个json list，可以配置多个模型，只要token不重复就行
+- token 自定义的token，后续在请求的时候拿着这个token来请求
+- type 类型，表示以下config中的配置是那个模型的，比如 openai，通义千问
+- config， 配置openai的api_base, api_key, model等， 针对不用模型有不同的配置（下边有配置示例，更详细配置可以看代码）， 此处的配置优先于客户端请求中的配置，比如"temperature": 0.8,  会覆盖请求中的temperature（这里的想法是可以针对同一个模型，调整不同参数，映射成一个新模型）
 
 ## 使用方式
 
@@ -179,7 +209,7 @@
     },
     {
         "token": "gemini-7c7aa4a3549f5",
-        "type": "gemini",
+        "type": "gemini",   // gemini
         "config": {
             "api_key": "xxxxx",
             "proxies": {
@@ -188,7 +218,7 @@
         }
     },
     {
-        "token": "bing-7c7aa4a3549f5",
+        "token": "bing-7c7aa4a3549f5",  // 必应
         "type": "bing-sydney",
         "config": {
             "cookie": "xxxxx",
@@ -196,7 +226,7 @@
         }
     },
     {
-        "token":"qwen-111111xxxx",
+        "token":"qwen-111111xxxx",  // 通义千问
         "type":"qwen",
         "config":{
             "api_key":"sk-xxxxxxxx",
diff --git a/config.py b/config.py
index 39a1142..71e3b9d 100644
--- a/config.py
+++ b/config.py
@@ -4,7 +4,7 @@
 from pydantic import BaseModel
 from loguru import logger
 from adapters.adapter_factory import init_adapter, clear_adapters
-
+import os
 
 class ModelConfig(BaseModel):
     token: str
@@ -14,6 +14,8 @@ class ModelConfig(BaseModel):
 
 token_2_modelconfig: Dict[str, ModelConfig] = dict()
 config_path = "model-config.json"
+if not os.path.exists(config_path):
+    config_path = "model-config-default.json"
 
 
 def load_model_config():
diff --git a/docker-compose.yml b/docker-compose.yml
index d3ae43e..16cfb00 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,9 +1,11 @@
 version: "1"
 services:
-  openai-key:
-    build: .
-    command: python open-api.py
+  openai-style-api:
+    image: tianminghui/openai-style-api
+    container_name: openai-style-api
     ports:
       - "8090:8090"
     environment:
       - ADMIN-TOKEN=admin
+    volumes:
+      - /path/to/your/model-config.json:/app/model-config.json
\ No newline at end of file
diff --git a/model-config.template b/model-config-default.json
similarity index 100%
rename from model-config.template
rename to model-config-default.json
diff --git a/open-api.py b/open-api.py
index 59176f4..3c2e612 100644
--- a/open-api.py
+++ b/open-api.py
@@ -1,4 +1,4 @@
-import json
+import uuid
 from fastapi import FastAPI, Depends, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
@@ -21,7 +21,7 @@
 from fastapi.staticfiles import StaticFiles
 
 router = APIRouter()
-admin_token = "admin"
+admin_token = uuid.uuid1()
 
 
 def create_app():
diff --git a/tests/test.py b/tests/test.py
index c56a319..b786f1b 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -65,6 +65,6 @@ def multiple_messages_test(**kwargs):
     time.sleep(2)
     single_message_test(stream=True)
     # time.sleep(2)
-    multiple_messages_test()
+    # multiple_messages_test()
     time.sleep(2)
     # multiple_messages_test(stream=True)

From 1a16d00287c911c15f11a969c74abc54486dfa88 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Wed, 3 Apr 2024 18:57:46 +0800
Subject: [PATCH 13/24] Create docker-publish.yml

---
 .github/workflows/docker-publish.yml | 52 ++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 .github/workflows/docker-publish.yml

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000..ceac487
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,52 @@
+name: Publish Docker image
+
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to Docker Hub
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Check out the repo
+        uses: actions/checkout@v3
+      -
+        name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      
+      - 
+        name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: tianminghui/openai-style-api
+          tags: |
+            type=raw,value=latest
+            type=ref,event=tag
+      
+      - 
+        name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - 
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      
+      - 
+        name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+            

From 70a74cffd50230fe05ea7c5125273ab14e0fa9b3 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Wed, 3 Apr 2024 19:09:31 +0800
Subject: [PATCH 14/24] Update docker-publish.yml

---
 .github/workflows/docker-publish.yml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index ceac487..af5f0db 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -26,18 +26,6 @@ jobs:
         uses: docker/metadata-action@v4
         with:
           images: tianminghui/openai-style-api
-          tags: |
-            type=raw,value=latest
-            type=ref,event=tag
-      
-      - 
-        name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
-      - 
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      
       - 
         name: Build and push Docker image
         uses: docker/build-push-action@v4

From e89a990c0f3851073cbebf0c52ef739d770a64d2 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Wed, 3 Apr 2024 19:23:02 +0800
Subject: [PATCH 15/24] Update docker-publish.yml

---
 .github/workflows/docker-publish.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index af5f0db..5eb43c4 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -31,10 +31,7 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           context: .
-          platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
             

From 30118804a741c1f4b0c8ee09e847fb8b84fa47ec Mon Sep 17 00:00:00 2001
From: tianminghui <tianminghui@kuaishou.com>
Date: Wed, 3 Apr 2024 22:10:12 +0800
Subject: [PATCH 16/24] fix

---
 README.md | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 5f796fb..3cee85d 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@
 
     docker pull tianminghui/openai-style-api
 
-    docker run -d -p 8090:8090 \
+    docker run -d -p 8090:8090 --name openai-style-api\
     -e ADMIN-TOKEN=admin \
     -v /path/to/your/model-config.json:/app/model-config.json \
     tianminghui/openai-style-api
@@ -68,16 +68,19 @@ clone本项目，或者下载项目中的`docker-compose.yml`文件，修改其
 model-config.json 配置文件简单示例
 
 ```
-    [{
-        "token": "f2b7295fc440db7f",
-        "type": "openai",  // openai 
-        "config": {
-            "api_base": "https://api.openai.com/v1/",
-            "api_key": "sk-xxxxxx",
-            "model": "gpt-3.5-turbo"
-            "temperature": 0.8
+    [
+        {
+            "token": "f2b7295fc440db7f",
+            "type": "azure",  // azure openai 模型
+            "config": {
+                "api_base": "https://xxxx.openai.azure.com/",
+                "deployment_id": "gpt-35-turbo",
+                "api_version": "2023-05-15",
+                "api_key": "xxxxxx",
+                "temperature": 0.8
+            }
         }
-    }]
+    ]
 ```
 - 整个文件是一个json list，可以配置多个模型，只要token不重复就行
 - token 自定义的token，后续在请求的时候拿着这个token来请求

From c36cf8e82cc11c2649b9edc61bfc5a97a68678a3 Mon Sep 17 00:00:00 2001
From: iamsk <iamsk.info@gmail.com>
Date: Thu, 4 Apr 2024 17:42:25 +0800
Subject: [PATCH 17/24] skylark support

---
 adapters/adapter_factory.py |  3 +++
 adapters/skylark.py         | 49 +++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 adapters/skylark.py

diff --git a/adapters/adapter_factory.py b/adapters/adapter_factory.py
index e83a3f4..c9d8139 100644
--- a/adapters/adapter_factory.py
+++ b/adapters/adapter_factory.py
@@ -11,6 +11,7 @@
 from adapters.gemini_adapter import GeminiAdapter
 from adapters.bing_sydney import BingSydneyModel
 from adapters.qwen import QWenAdapter
+from adapters.skylark import SkylarkAdapter
 model_instance_dict = {}
 
 
@@ -53,6 +54,8 @@ def init_adapter(instanceKey: str, type: str, **kwargs) -> ModelAdapter:
             model = BingSydneyModel(**kwargs)
         elif type == "qwen":
             model = QWenAdapter(**kwargs)
+        elif type == "skylark":
+            model = SkylarkAdapter(**kwargs)
         else:
             raise ValueError(f"unknown model type: {type}")
     except Exception as e:
diff --git a/adapters/skylark.py b/adapters/skylark.py
new file mode 100644
index 0000000..1478625
--- /dev/null
+++ b/adapters/skylark.py
@@ -0,0 +1,49 @@
+from typing import Iterator
+from adapters.base import ModelAdapter
+from adapters.protocol import ChatCompletionRequest, ChatCompletionResponse
+from volcengine.maas import MaasService
+
+
+class SkylarkAdapter(ModelAdapter):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.model = kwargs.pop("model")
+        self.config_args = kwargs
+        self.maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
+        api_key = kwargs.pop("api_key")
+        ak, sk = api_key.split(":")
+        self.maas.set_ak(ak)
+        self.maas.set_sk(sk)
+
+    def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
+        data = self.openai_req_2_sl_req(request)
+        resp = self.maas.chat(data)
+        yield ChatCompletionResponse(**self.sl_resp_2_openai_resp(resp))
+
+    def sl_resp_2_openai_resp(self, response: dict) -> dict:
+        id = response["req_id"]
+        prompt_tokens = response["usage"]["prompt_tokens"]
+        completion_tokens = response["usage"]["completion_tokens"]
+        content = response["choice"]["message"]["content"]
+        return self.completion_to_openai_response(
+            content,
+            model=self.model,
+            id=id,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+    def openai_req_2_sl_req(self, request: ChatCompletionRequest) -> dict:
+        req = {
+            "model": {
+                "name": self.model,
+            },
+            "parameters": {
+                "max_new_tokens": request.max_length or self.config_args.get('max_length'),
+                "temperature": request.temperature or self.config_args.get('temperature')
+            },
+            "messages": [
+                m.model_dump(exclude_none=True) for m in request.messages
+            ]
+        }
+        return req

From 02e294dc8fa026502b8b5fa6e88b978d76825ad9 Mon Sep 17 00:00:00 2001
From: iamsk <iamsk.info@gmail.com>
Date: Thu, 4 Apr 2024 20:28:30 +0800
Subject: [PATCH 18/24] stream for skylark

---
 adapters/skylark.py   | 20 ++++++++++++++++++--
 tests/test.py         |  5 +----
 tests/test_skylark.py | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_skylark.py

diff --git a/adapters/skylark.py b/adapters/skylark.py
index 1478625..a2b9c6d 100644
--- a/adapters/skylark.py
+++ b/adapters/skylark.py
@@ -17,8 +17,13 @@ def __init__(self, **kwargs):
 
     def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompletionResponse]:
         data = self.openai_req_2_sl_req(request)
-        resp = self.maas.chat(data)
-        yield ChatCompletionResponse(**self.sl_resp_2_openai_resp(resp))
+        if request.stream:
+            resps = self.maas.stream_chat(data)
+            for resp in resps:
+                yield ChatCompletionResponse(**self.sl_resp_2_openai_resp_stream(resp))
+        else:
+            resp = self.maas.chat(data)
+            yield ChatCompletionResponse(**self.sl_resp_2_openai_resp(resp))
 
     def sl_resp_2_openai_resp(self, response: dict) -> dict:
         id = response["req_id"]
@@ -33,6 +38,17 @@ def sl_resp_2_openai_resp(self, response: dict) -> dict:
             completion_tokens=completion_tokens,
         )
 
+    def sl_resp_2_openai_resp_stream(self, response: dict) -> dict:
+        id = response["req_id"]
+        content = response["choice"]["message"]["content"]
+        return self.completion_to_openai_response(
+            content,
+            model=self.model,
+            id=id,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+
     def openai_req_2_sl_req(self, request: ChatCompletionRequest) -> dict:
         req = {
             "model": {
diff --git a/tests/test.py b/tests/test.py
index b786f1b..ce23b4f 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -6,8 +6,6 @@
 from loguru import logger
 
 
-
-
 def single_message_test(**kwargs):
     print(f"----------single message test {kwargs}----------")
     try:
@@ -22,7 +20,7 @@ def single_message_test(**kwargs):
         else:
             print(json.dumps(completion, ensure_ascii=False))
     except openai.APIError as e:
-        #Handle API error here, e.g. retry or log
+        # Handle API error here, e.g. retry or log
         print(f"OpenAI API returned an API Error: {e}")
         pass
     except Exception as e:
@@ -56,7 +54,6 @@ def multiple_messages_test(**kwargs):
         logger.error(f"Exception: {e}")
 
 
-
 if __name__ == "__main__":
     api_key = sys.argv[1]
     openai.api_base = "http://localhost:8090/v1"
diff --git a/tests/test_skylark.py b/tests/test_skylark.py
new file mode 100644
index 0000000..f8374c7
--- /dev/null
+++ b/tests/test_skylark.py
@@ -0,0 +1,37 @@
+import traceback
+import openai
+import sys
+import json
+from loguru import logger
+
+
+def single_message_test(**kwargs):
+    print(f"----------single message test {kwargs}----------")
+    try:
+        completion = openai.ChatCompletion.create(
+            model="Skylark2-pro-32k",
+            messages=[{"role": "user", "content": "what is the capital of China？"}],
+            **kwargs,
+        )
+        if kwargs.get("stream"):
+            for chunk in completion:
+                print(json.dumps(chunk, ensure_ascii=False))
+        else:
+            print(json.dumps(completion, ensure_ascii=False))
+    except openai.APIError as e:
+        # Handle API error here, e.g. retry or log
+        print(f"OpenAI API returned an API Error: {e}")
+        pass
+    except Exception as e:
+        s = traceback.format_exc()
+        print(s)
+        logger.error(f"An unexpected error occurred: {e}")
+        pass
+
+
+if __name__ == "__main__":
+    api_key = sys.argv[1]
+    openai.api_base = "http://localhost:8090/v1"
+    openai.api_key = api_key
+    # single_message_test()
+    single_message_test(stream=True)

From a302746080f95f6726dfc0e7b0e442c2cd745902 Mon Sep 17 00:00:00 2001
From: iamsk <iamsk.info@gmail.com>
Date: Thu, 4 Apr 2024 20:49:41 +0800
Subject: [PATCH 19/24] to chunk

---
 adapters/skylark.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/adapters/skylark.py b/adapters/skylark.py
index a2b9c6d..f7fb74c 100644
--- a/adapters/skylark.py
+++ b/adapters/skylark.py
@@ -19,8 +19,10 @@ def chat_completions(self, request: ChatCompletionRequest) -> Iterator[ChatCompl
         data = self.openai_req_2_sl_req(request)
         if request.stream:
             resps = self.maas.stream_chat(data)
+            index = 0
             for resp in resps:
-                yield ChatCompletionResponse(**self.sl_resp_2_openai_resp_stream(resp))
+                yield ChatCompletionResponse(**self.sl_resp_2_openai_resp_stream(resp, index))
+                index += 1
         else:
             resp = self.maas.chat(data)
             yield ChatCompletionResponse(**self.sl_resp_2_openai_resp(resp))
@@ -38,15 +40,15 @@ def sl_resp_2_openai_resp(self, response: dict) -> dict:
             completion_tokens=completion_tokens,
         )
 
-    def sl_resp_2_openai_resp_stream(self, response: dict) -> dict:
+    def sl_resp_2_openai_resp_stream(self, response: dict, index: int) -> dict:
         id = response["req_id"]
         content = response["choice"]["message"]["content"]
-        return self.completion_to_openai_response(
+        return self.completion_to_openai_stream_response(
             content,
-            model=self.model,
+            self.model,
+            index,
+            finish_reason="stop" if not content else None,
             id=id,
-            prompt_tokens=0,
-            completion_tokens=0,
         )
 
     def openai_req_2_sl_req(self, request: ChatCompletionRequest) -> dict:

From 672dcc6288f746161aa3eb76b73b9ca690b7ce61 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Thu, 18 Apr 2024 20:47:20 +0800
Subject: [PATCH 20/24] kimi

---
 model-config-default.json | 9 +++++++++
 tests/test.py             | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/model-config-default.json b/model-config-default.json
index f5be67b..dca5157 100644
--- a/model-config-default.json
+++ b/model-config-default.json
@@ -111,5 +111,14 @@
             "api_key":"sk-xxxxxxxx",
             "model":"qwen-turbo"
         }
+    },
+    {
+        "token": "kimi-GxqT3BlbkFJj1",
+        "type": "openai",
+        "config": {
+            "api_base": "https://api.moonshot.cn/v1/",
+            "api_key": "sk-xxxxxx",
+            "model": "moonshot-v1-8k"
+        }
     }
 ]
\ No newline at end of file
diff --git a/tests/test.py b/tests/test.py
index b786f1b..daa23eb 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -61,7 +61,7 @@ def multiple_messages_test(**kwargs):
     api_key = sys.argv[1]
     openai.api_base = "http://localhost:8090/v1"
     openai.api_key = api_key
-    # single_message_test()
+    single_message_test()
     time.sleep(2)
     single_message_test(stream=True)
     # time.sleep(2)

From 7ea1354f3d9d7b7099ac9c877444b87b877469c9 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Thu, 18 Apr 2024 20:52:43 +0800
Subject: [PATCH 21/24] kimi

---
 README.md          | 12 +++++++++++-
 docker-compose.yml |  2 +-
 tests/test.py      |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3cee85d..e9bb7fb 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@
   - [x] claude-api 【api申请在等待列表，暂未测试】
   - [x] claude-web (将web端功能封装成openai api)
   - [x] 智谱ai
+  - [x] kimi
   - [x] bingchat(copilot)
   - [ ] 百度文心一言
   - [x] 讯飞星火
@@ -52,7 +53,7 @@
 `/path/to/your/model-config.json` 替换成你自己的本地路径
 
 ### Docker compose
-clone本项目，或者下载项目中的`docker-compose.yml`文件，修改其中的`/path/to/your/model-config.json`, 然后运行以下命令
+clone本项目，或者下载项目中的`docker-compose.yml`文件，修改其中的`./model-config.json`路径, 然后运行以下命令
 
     docker-compose up -d
 
@@ -235,6 +236,15 @@ model-config.json 配置文件简单示例
             "api_key":"sk-xxxxxxxx",
             "model":"qwen-turbo"
         }
+    },
+    {
+        "token": "kimi-GxqT3BlbkFJj1", // kimi
+        "type": "openai",    // kimi api与openai相同，因此使用openai就可以
+        "config": {
+            "api_base": "https://api.moonshot.cn/v1/",
+            "api_key": "sk-xxxxxx",
+            "model": "moonshot-v1-8k"
+        }
     }
     ]
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 16cfb00..b53fd02 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,4 +8,4 @@ services:
     environment:
       - ADMIN-TOKEN=admin
     volumes:
-      - /path/to/your/model-config.json:/app/model-config.json
\ No newline at end of file
+      - ./model-config.json:/app/model-config.json
\ No newline at end of file
diff --git a/tests/test.py b/tests/test.py
index daa23eb..b786f1b 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -61,7 +61,7 @@ def multiple_messages_test(**kwargs):
     api_key = sys.argv[1]
     openai.api_base = "http://localhost:8090/v1"
     openai.api_key = api_key
-    single_message_test()
+    # single_message_test()
     time.sleep(2)
     single_message_test(stream=True)
     # time.sleep(2)

From 4313dae9588d9c2fa5c6d1e30fbd4b68e94bd11a Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Fri, 19 Apr 2024 14:06:49 +0800
Subject: [PATCH 22/24] openai>=1.0.0

---
 README.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/README.md b/README.md
index e9bb7fb..dec8deb 100644
--- a/README.md
+++ b/README.md
@@ -110,6 +110,8 @@ model-config.json 配置文件简单示例
 
 ### openai库调用
 
+openai<1.0.0 使用如下方式
+
     import openai
 
     openai.api_key = "f2b7295fc440db7f"
@@ -119,6 +121,30 @@ model-config.json 配置文件简单示例
         model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
     print(completion.choices[0].message.content)
 
+
+openai>=1.0.0使用以下方式调用
+    
+    import os
+    from openai import OpenAI
+
+    client = OpenAI(
+        # This is the default and can be omitted
+        api_key='kimi-GxqT3BlbkFJj',
+        base_url = 'http://localhost:8090/v1'
+    )
+
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": "Say this is a test",
+            }
+        ],
+        model="gpt-3.5-turbo",
+    )
+
+    print(chat_completion.choices[0].message.content)
+
 ### 第三方应用
 
 [ChatGPT Next Web](https://github.com/Yidadaa/ChatGPT-Next-Web)

From 5ed42ce720befdaae00ebec7134620ac550026a5 Mon Sep 17 00:00:00 2001
From: tianminghui <tianmh2013@163.com>
Date: Fri, 19 Apr 2024 14:25:11 +0800
Subject: [PATCH 23/24] =?UTF-8?q?feat:=E5=8D=87=E7=BA=A7openai=E5=88=B0?=
 =?UTF-8?q?=E6=9C=80=E6=96=B0=E7=89=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt |  6 ++++--
 tests/test.py    | 48 +++++++++++++++++++++++-------------------------
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b89b243..80b9ae8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,19 +12,21 @@ charset-normalizer==3.3.0
 click==8.1.7
 curl-cffi==0.5.9
 dataclasses==0.6
+distro==1.9.0
 exceptiongroup==1.1.3
 fastapi==0.103.2
 frozenlist==1.4.0
 gevent==23.9.1
 greenlet==3.0.0
 h11==0.14.0
-httpcore==0.18.0
+httpcore==1.0.5
 httptools==0.6.0
+httpx==0.27.0
 idna==3.4
 iniconfig==2.0.0
 loguru==0.7.2
 multidict==6.0.4
-openai==0.28.1
+openai==1.23.1
 packaging==23.2
 pluggy==1.4.0
 pycparser==2.21
diff --git a/tests/test.py b/tests/test.py
index b786f1b..4f18809 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,5 +1,8 @@
 import traceback
 import openai
+from openai import OpenAI
+
+client = None
 import sys
 import json
 import time
@@ -11,16 +14,14 @@
 def single_message_test(**kwargs):
     print(f"----------single message test {kwargs}----------")
     try:
-        completion = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "你好"}],
-            **kwargs,
-        )
+        completion = client.chat.completions.create(model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "你好"}],
+        **kwargs)
         if kwargs.get("stream"):
             for chunk in completion:
-                print(json.dumps(chunk, ensure_ascii=False))
+                print(chunk)
         else:
-            print(json.dumps(completion, ensure_ascii=False))
+            print(completion)
     except openai.APIError as e:
         #Handle API error here, e.g. retry or log
         print(f"OpenAI API returned an API Error: {e}")
@@ -35,21 +36,19 @@ def single_message_test(**kwargs):
 def multiple_messages_test(**kwargs):
     print(f"----------multiple messages test {kwargs}----------")
     try:
-        completion = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "你是一个旅行专家, 能够帮我们制定旅行计划"},
-                {"role": "user", "content": "你好"},
-                {"role": "assistant", "content": "你好!很高兴认识你。"},
-                {"role": "user", "content": "你是谁？"},
-            ],
-            **kwargs,
-        )
+        completion = client.chat.completions.create(model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "你是一个旅行专家, 能够帮我们制定旅行计划"},
+            {"role": "user", "content": "你好"},
+            {"role": "assistant", "content": "你好!很高兴认识你。"},
+            {"role": "user", "content": "你是谁？"},
+        ],
+        **kwargs)
         if kwargs.get("stream"):
             for chunk in completion:
-                print(json.dumps(chunk, ensure_ascii=False))
+                print(chunk)
         else:
-            print(json.dumps(completion, ensure_ascii=False))
+            print(completion)
     except Exception as e:
         s = traceback.format_exc()
         print(s)
@@ -59,12 +58,11 @@ def multiple_messages_test(**kwargs):
 
 if __name__ == "__main__":
     api_key = sys.argv[1]
-    openai.api_base = "http://localhost:8090/v1"
-    openai.api_key = api_key
+    client = OpenAI(api_key=api_key, base_url="http://localhost:8090/v1")
     # single_message_test()
-    time.sleep(2)
-    single_message_test(stream=True)
     # time.sleep(2)
-    # multiple_messages_test()
+    # single_message_test(stream=True)
+    # time.sleep(2)
+    multiple_messages_test()
     time.sleep(2)
-    # multiple_messages_test(stream=True)
+    multiple_messages_test(stream=True)

From 2bfb84a500f8c3447a3abaa3e8c7edf28fdec3c6 Mon Sep 17 00:00:00 2001
From: iamsk <iamsk.info@gmail.com>
Date: Thu, 25 Apr 2024 22:42:15 +0800
Subject: [PATCH 24/24] add volcengine requirements add demo config for skylark

---
 model-config-default.json | 12 +++++++++++-
 requirements.txt          |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/model-config-default.json b/model-config-default.json
index f5be67b..c71c387 100644
--- a/model-config-default.json
+++ b/model-config-default.json
@@ -111,5 +111,15 @@
             "api_key":"sk-xxxxxxxx",
             "model":"qwen-turbo"
         }
+    },
+    {
+    "token": "sl-7c7aa4a3549f5",
+    "type": "skylark",
+    "config": {
+      "api_key": "AKxxxxxx",
+      "model": "Skylark2-pro-32k",
+      "temperature": 0.01,
+      "max_length": 1999
+    }
     }
-]
\ No newline at end of file
+]
diff --git a/requirements.txt b/requirements.txt
index b89b243..ee6989b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -53,3 +53,4 @@ websockets==11.0.3
 yarl==1.9.2
 zope.event==5.0
 zope.interface==6.1
+volcengine==1.0.138