Skip to content

Commit

Permalink
refactor(webserver): switch to openai chat interface (TabbyML#2564)
Browse files Browse the repository at this point in the history
* refactor(webserver): switch to openai chat interface

* fix query get content

* update utoipa path

* fix test

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
  • Loading branch information
wsxiaoys and autofix-ci[bot] authored Jul 3, 2024
1 parent 4ce404e commit 64cc7f4
Showing 22 changed files with 199 additions and 602 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -64,6 +64,7 @@ mime_guess = "2.0.4"
assert_matches = "1.5"
insta = "1.34.0"
logkit = "0.3"
async-openai = "0.20"

[workspace.dependencies.uuid]
version = "1.3.3"
3 changes: 1 addition & 2 deletions crates/http-api-bindings/Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@ homepage.workspace = true

[dependencies]
anyhow.workspace = true
async-openai = "0.20"
async-stream.workspace = true
async-trait.workspace = true
futures.workspace = true
@@ -18,7 +17,7 @@ serde_json = { workspace = true }
tabby-common = { path = "../tabby-common" }
tabby-inference = { path = "../tabby-inference" }
ollama-api-bindings = { path = "../ollama-api-bindings" }
tracing.workspace = true
async-openai.workspace = true

[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
17 changes: 5 additions & 12 deletions crates/http-api-bindings/src/chat/mod.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
mod openai_chat;

use std::sync::Arc;

use openai_chat::OpenAIChatEngine;
use async_openai::config::OpenAIConfig;
use tabby_common::config::HttpModelConfig;
use tabby_inference::ChatCompletionStream;

pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
match model.kind.as_str() {
"openai/chat" => Arc::new(OpenAIChatEngine::create(
&model.api_endpoint,
model.model_name.as_deref().unwrap_or_default(),
model.api_key.clone(),
)),
"ollama/chat" => ollama_api_bindings::create_chat(model).await,
let config = OpenAIConfig::default()
.with_api_base(model.api_endpoint.clone())
.with_api_key(model.api_key.clone().unwrap_or_default());

unsupported_kind => panic!("Unsupported kind for http chat: {}", unsupported_kind),
}
Arc::new(async_openai::Client::with_config(config))
}
91 changes: 0 additions & 91 deletions crates/http-api-bindings/src/chat/openai_chat.rs

This file was deleted.

1 change: 1 addition & 0 deletions crates/llama-cpp-server/Cargo.toml
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ anyhow.workspace = true
which = "6"
serde.workspace = true
serdeconv.workspace = true
async-openai.workspace = true

[build-dependencies]
cmake = "0.1"
17 changes: 4 additions & 13 deletions crates/llama-cpp-server/src/lib.rs
Original file line number Diff line number Diff line change
@@ -3,18 +3,16 @@ mod supervisor;
use std::{path::PathBuf, sync::Arc};

use anyhow::Result;
use async_openai::config::OpenAIConfig;
use async_trait::async_trait;
use futures::stream::BoxStream;
use serde::Deserialize;
use supervisor::LlamaCppSupervisor;
use tabby_common::{
api::chat::Message,
config::{HttpModelConfigBuilder, LocalModelConfig, ModelConfig},
registry::{parse_model_id, ModelRegistry, GGML_MODEL_RELATIVE_PATH},
};
use tabby_inference::{
ChatCompletionOptions, ChatCompletionStream, CompletionOptions, CompletionStream, Embedding,
};
use tabby_inference::{ChatCompletionStream, CompletionOptions, CompletionStream, Embedding};

fn api_endpoint(port: u16) -> String {
format!("http://127.0.0.1:{port}")
@@ -141,16 +139,9 @@ impl ChatCompletionServer {
}
}

#[async_trait]
impl ChatCompletionStream for ChatCompletionServer {
async fn chat_completion(
&self,
messages: &[Message],
options: ChatCompletionOptions,
) -> Result<BoxStream<String>> {
self.chat_completion
.chat_completion(messages, options)
.await
fn get(&self) -> async_openai::Chat<'_, OpenAIConfig> {
self.chat_completion.get()
}
}

93 changes: 0 additions & 93 deletions crates/ollama-api-bindings/src/chat.rs

This file was deleted.

3 changes: 0 additions & 3 deletions crates/ollama-api-bindings/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
mod model;

mod chat;
pub use chat::create as create_chat;

mod completion;
pub use completion::create as create_completion;

11 changes: 0 additions & 11 deletions crates/tabby-common/src/api/mod.rs
Original file line number Diff line number Diff line change
@@ -2,14 +2,3 @@ pub mod code;
pub mod doc;
pub mod event;
pub mod server_setting;

pub mod chat {
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;

#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
pub struct Message {
pub role: String,
pub content: String,
}
}
1 change: 1 addition & 0 deletions crates/tabby-inference/Cargo.toml
Original file line number Diff line number Diff line change
@@ -16,3 +16,4 @@ derive_builder = "0.12.0"
futures = { workspace = true }
tabby-common = { path = "../tabby-common" }
trie-rs = "0.1.1"
async-openai.workspace = true
32 changes: 7 additions & 25 deletions crates/tabby-inference/src/chat.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,11 @@
use anyhow::Result;
use async_trait::async_trait;
use derive_builder::Builder;
use futures::stream::BoxStream;
use tabby_common::api::chat::Message;
use async_openai::config::OpenAIConfig;

#[derive(Builder, Debug)]
pub struct ChatCompletionOptions {
#[builder(default = "0.1")]
pub sampling_temperature: f32,

#[builder(default = "crate::default_seed()")]
pub seed: u64,

#[builder(default = "1920")]
pub max_decoding_tokens: i32,

#[builder(default = "0.0")]
pub presence_penalty: f32,
pub trait ChatCompletionStream: Sync + Send {
fn get(&self) -> async_openai::Chat<'_, OpenAIConfig>;
}

#[async_trait]
pub trait ChatCompletionStream: Sync + Send {
async fn chat_completion(
&self,
messages: &[Message],
options: ChatCompletionOptions,
) -> Result<BoxStream<String>>;
impl ChatCompletionStream for async_openai::Client<OpenAIConfig> {
fn get(&self) -> async_openai::Chat<'_, OpenAIConfig> {
self.chat()
}
}
Loading

0 comments on commit 64cc7f4

Please sign in to comment.