refactor(webserver): switch to openai chat interface (TabbyML#2564)

* refactor(webserver): switch to openai chat interface * fix query get content * update utoipa path * fix test * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
tardigrade34 · Jul 3, 2024 · 64cc7f4 · 64cc7f4
1 parent 4ce404e
commit 64cc7f4
Showing 22 changed files with 199 additions and 602 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -64,6 +64,7 @@ mime_guess = "2.0.4"
 assert_matches = "1.5"
 insta = "1.34.0" 
 logkit = "0.3"
+async-openai = "0.20"
 
 [workspace.dependencies.uuid]
 version = "1.3.3"

diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml
@@ -7,7 +7,6 @@ homepage.workspace = true
 
 [dependencies]
 anyhow.workspace = true
-async-openai = "0.20"
 async-stream.workspace = true
 async-trait.workspace = true
 futures.workspace = true
@@ -18,7 +17,7 @@ serde_json = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 tabby-inference = { path = "../tabby-inference" }
 ollama-api-bindings = { path = "../ollama-api-bindings" }
-tracing.workspace = true
+async-openai.workspace = true
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["rt", "macros"] }
diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs
@@ -1,20 +1,13 @@
-mod openai_chat;
-
 use std::sync::Arc;
 
-use openai_chat::OpenAIChatEngine;
+use async_openai::config::OpenAIConfig;
 use tabby_common::config::HttpModelConfig;
 use tabby_inference::ChatCompletionStream;
 
 pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
-    match model.kind.as_str() {
-        "openai/chat" => Arc::new(OpenAIChatEngine::create(
-            &model.api_endpoint,
-            model.model_name.as_deref().unwrap_or_default(),
-            model.api_key.clone(),
-        )),
-        "ollama/chat" => ollama_api_bindings::create_chat(model).await,
+    let config = OpenAIConfig::default()
+        .with_api_base(model.api_endpoint.clone())
+        .with_api_key(model.api_key.clone().unwrap_or_default());
 
-        unsupported_kind => panic!("Unsupported kind for http chat: {}", unsupported_kind),
-    }
+    Arc::new(async_openai::Client::with_config(config))
 }
diff --git a/crates/http-api-bindings/src/chat/openai_chat.rs b/crates/http-api-bindings/src/chat/openai_chat.rs
diff --git a/crates/llama-cpp-server/Cargo.toml b/crates/llama-cpp-server/Cargo.toml
@@ -24,6 +24,7 @@ anyhow.workspace = true
 which = "6"
 serde.workspace = true
 serdeconv.workspace = true
+async-openai.workspace = true
 
 [build-dependencies]
 cmake = "0.1"

diff --git a/crates/llama-cpp-server/src/lib.rs b/crates/llama-cpp-server/src/lib.rs
@@ -3,18 +3,16 @@ mod supervisor;
 use std::{path::PathBuf, sync::Arc};
 
 use anyhow::Result;
+use async_openai::config::OpenAIConfig;
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use serde::Deserialize;
 use supervisor::LlamaCppSupervisor;
 use tabby_common::{
-    api::chat::Message,
     config::{HttpModelConfigBuilder, LocalModelConfig, ModelConfig},
     registry::{parse_model_id, ModelRegistry, GGML_MODEL_RELATIVE_PATH},
 };
-use tabby_inference::{
-    ChatCompletionOptions, ChatCompletionStream, CompletionOptions, CompletionStream, Embedding,
-};
+use tabby_inference::{ChatCompletionStream, CompletionOptions, CompletionStream, Embedding};
 
 fn api_endpoint(port: u16) -> String {
     format!("http://127.0.0.1:{port}")
@@ -141,16 +139,9 @@ impl ChatCompletionServer {
     }
 }
 
-#[async_trait]
 impl ChatCompletionStream for ChatCompletionServer {
-    async fn chat_completion(
-        &self,
-        messages: &[Message],
-        options: ChatCompletionOptions,
-    ) -> Result<BoxStream<String>> {
-        self.chat_completion
-            .chat_completion(messages, options)
-            .await
+    fn get(&self) -> async_openai::Chat<'_, OpenAIConfig> {
+        self.chat_completion.get()
     }
 }
 

diff --git a/crates/ollama-api-bindings/src/chat.rs b/crates/ollama-api-bindings/src/chat.rs
diff --git a/crates/ollama-api-bindings/src/lib.rs b/crates/ollama-api-bindings/src/lib.rs
@@ -1,8 +1,5 @@
 mod model;
 
-mod chat;
-pub use chat::create as create_chat;
-
 mod completion;
 pub use completion::create as create_completion;
 

diff --git a/crates/tabby-common/src/api/mod.rs b/crates/tabby-common/src/api/mod.rs
@@ -2,14 +2,3 @@ pub mod code;
 pub mod doc;
 pub mod event;
 pub mod server_setting;
-
-pub mod chat {
-    use serde::{Deserialize, Serialize};
-    use utoipa::ToSchema;
-
-    #[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
-    pub struct Message {
-        pub role: String,
-        pub content: String,
-    }
-}
diff --git a/crates/tabby-inference/Cargo.toml b/crates/tabby-inference/Cargo.toml
@@ -16,3 +16,4 @@ derive_builder = "0.12.0"
 futures = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 trie-rs = "0.1.1"
+async-openai.workspace = true
diff --git a/crates/tabby-inference/src/chat.rs b/crates/tabby-inference/src/chat.rs
@@ -1,29 +1,11 @@
-use anyhow::Result;
-use async_trait::async_trait;
-use derive_builder::Builder;
-use futures::stream::BoxStream;
-use tabby_common::api::chat::Message;
+use async_openai::config::OpenAIConfig;
 
-#[derive(Builder, Debug)]
-pub struct ChatCompletionOptions {
-    #[builder(default = "0.1")]
-    pub sampling_temperature: f32,
-
-    #[builder(default = "crate::default_seed()")]
-    pub seed: u64,
-
-    #[builder(default = "1920")]
-    pub max_decoding_tokens: i32,
-
-    #[builder(default = "0.0")]
-    pub presence_penalty: f32,
+pub trait ChatCompletionStream: Sync + Send {
+    fn get(&self) -> async_openai::Chat<'_, OpenAIConfig>;
 }
 
-#[async_trait]
-pub trait ChatCompletionStream: Sync + Send {
-    async fn chat_completion(
-        &self,
-        messages: &[Message],
-        options: ChatCompletionOptions,
-    ) -> Result<BoxStream<String>>;
+impl ChatCompletionStream for async_openai::Client<OpenAIConfig> {
+    fn get(&self) -> async_openai::Chat<'_, OpenAIConfig> {
+        self.chat()
+    }
 }