chore: update inference some providers

truongvinh57 · Aug 22, 2024 · 4f1e820 · 4f1e820
1 parent c5910da
commit 4f1e820
Show file tree

Hide file tree

Showing 14 changed files with 105 additions and 12 deletions.
diff --git a/inference/providers/azure_openai/resources/i18n/en.yml b/inference/providers/azure_openai/resources/i18n/en.yml
@@ -24,5 +24,11 @@ gpt_4_turbo_description: "GPT-4 Turbo model featuring improved instruction follo
 gpt_4_vision_name: "GPT-4-Turbo-Vision"
 gpt_4_vision_description: "GPT-4 with the ability to understand images, in addition to all other GPT-4 Turbo capabilities."
 
+gpt_4o_name: "GPT-4o"
+gpt_4o_description: "GPT-4o integrates text and images in a single model, enabling it to handle multiple data types simultaneously."
+
+gpt_4o_mini_name: "GPT-4o-Mini"
+gpt_4o_mini_description: "GPT-4o-Mini is a smaller version of GPT-4o that is only capable of handling natural language and images."
+
 text_embedding_ada_002_name: "text-embedding-ada-002"
 text_embedding_ada_002_description: "Stronger performance. It outperforms all the old embedding models on text search, code search, and sentence similarity tasks and gets comparable performance on text classification."
diff --git a/inference/providers/azure_openai/resources/models/gpt-3.5-turbo.yml b/inference/providers/azure_openai/resources/models/gpt-3.5-turbo.yml
@@ -34,7 +34,7 @@ config_schemas:
   - config_id: stop
 
 pricing:
-  input_token: 0.0015
+  input_token: 0.001
   output_token: 0.002
   unit: 1000
   currency: USD
diff --git a/inference/providers/azure_openai/resources/models/gpt-4o-mini.yml b/inference/providers/azure_openai/resources/models/gpt-4o-mini.yml
@@ -0,0 +1,40 @@
+model_schema_id: azure_openai/gpt-4o-mini
+provider_model_id: gpt-4o-mini
+type: chat_completion
+name: "i18n:gpt_4o_mini_name"
+description: "i18n:gpt_4o_mini_description"
+
+properties:
+  function_call: true
+  streaming: true
+  input_token_limit: 128000
+  output_token_limit: 4096
+
+config_schemas:
+  - config_id: temperature
+  - config_id: top_p
+  - config_id: presence_penalty
+    type: float
+    default: 0.0
+    min: -2.0
+    max: 2.0
+    step: 0.1
+  - config_id: frequency_penalty
+    type: float
+    default: 0.0
+    min: -2.0
+    max: 2.0
+    step: 0.01
+  - config_id: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    step: 1
+  - config_id: stop
+
+pricing:
+  input_token: 0.005
+  output_token: 0.015
+  unit: 1000
+  currency: USD
diff --git a/inference/providers/azure_openai/resources/models/gpt-4o.yml b/inference/providers/azure_openai/resources/models/gpt-4o.yml
@@ -0,0 +1,40 @@
+model_schema_id: azure_openai/gpt-4o
+provider_model_id: gpt-4o
+type: chat_completion
+name: "i18n:gpt_4o_name"
+description: "i18n:gpt_4o_description"
+
+properties:
+  function_call: true
+  streaming: true
+  input_token_limit: 128000
+  output_token_limit: 4096
+
+config_schemas:
+  - config_id: temperature
+  - config_id: top_p
+  - config_id: presence_penalty
+    type: float
+    default: 0.0
+    min: -2.0
+    max: 2.0
+    step: 0.1
+  - config_id: frequency_penalty
+    type: float
+    default: 0.0
+    min: -2.0
+    max: 2.0
+    step: 0.01
+  - config_id: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    step: 1
+  - config_id: stop
+
+pricing:
+  input_token: 0.005
+  output_token: 0.015
+  unit: 1000
+  currency: USD
diff --git a/inference/providers/cohere/resources/models/command-light-nightly.yml b/inference/providers/cohere/resources/models/command-light-nightly.yml
@@ -8,7 +8,8 @@ default_endpoint_url: https://api.cohere.com/v1/messages
 properties:
   function_call: false
   streaming: true
-  input_token_limit: 8192
+  input_token_limit: 4096
+  output_token_limit: 4096
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/cohere/resources/models/command-light.yml b/inference/providers/cohere/resources/models/command-light.yml
@@ -9,6 +9,7 @@ properties:
   function_call: false
   streaming: true
   input_token_limit: 4096
+  output_token_limit: 4096
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/cohere/resources/models/command-nightly.yml b/inference/providers/cohere/resources/models/command-nightly.yml
@@ -8,7 +8,8 @@ default_endpoint_url: https://api.cohere.com/v1/messages
 properties:
   function_call: false
   streaming: true
-  input_token_limit: 8192
+  input_token_limit: 128000
+  output_token_limit: 128000
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/cohere/resources/models/command-r-plus.yml b/inference/providers/cohere/resources/models/command-r-plus.yml
@@ -9,6 +9,7 @@ properties:
   function_call: false
   streaming: true
   input_token_limit: 128000
+  output_token_limit: 4096
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/cohere/resources/models/command-r.yml b/inference/providers/cohere/resources/models/command-r.yml
@@ -9,6 +9,7 @@ properties:
   function_call: false
   streaming: true
   input_token_limit: 128000
+  output_token_limit: 4096
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/cohere/resources/models/command.yml b/inference/providers/cohere/resources/models/command.yml
@@ -9,6 +9,7 @@ properties:
   function_call: false
   streaming: true
   input_token_limit: 4096
+  output_token_limit: 4096
 
 config_schemas:
   - config_id: temperature

diff --git a/inference/providers/deepseek/chat_completion.py b/inference/providers/deepseek/chat_completion.py
@@ -49,13 +49,13 @@ def _build_deepseek_chat_completion_payload(
         if value is not None:
             payload[key] = value
 
-    # if function_call:
-    #     if function_call in ["none", "auto"]:
-    #         payload["tool_choice"] = function_call
-    #     else:
-    #         payload["tool_choice"] = {"name": function_call}
-    # if functions:
-    #     payload["tools"] = [{"type": "function", "function": f.model_dump()} for f in functions]
+    if function_call:
+        if function_call in ["none", "auto"]:
+            payload["tool_choice"] = function_call
+        else:
+            payload["tool_choice"] = {"name": function_call}
+    if functions:
+        payload["tools"] = [{"type": "function", "function": f.model_dump()} for f in functions]
     logger.debug(f"_build_deepseek_chat_completion_payload: {payload}")
     return payload
 

diff --git a/inference/providers/deepseek/resources/models/deepseek-chat.yml b/inference/providers/deepseek/resources/models/deepseek-chat.yml
@@ -6,7 +6,7 @@ description: "i18n:deepseek_chat_description"
 default_endpoint_url: https://api.deepseek.com/chat/completions
 
 properties:
-  function_call: false
+  function_call: true
   streaming: true
   input_token_limit: 128000
   output_token_limit: 4000

diff --git a/inference/providers/deepseek/resources/models/deepseek-coder.yml b/inference/providers/deepseek/resources/models/deepseek-coder.yml
@@ -6,7 +6,7 @@ description: "i18n:deepseek_coder_description"
 default_endpoint_url: https://api.deepseek.com/chat/completions
 
 properties:
-  function_call: false
+  function_call: true
   streaming: true
   input_token_limit: 128000
   output_token_limit: 4000

diff --git a/inference/test/test_chat_completion.py b/inference/test/test_chat_completion.py
@@ -441,6 +441,7 @@ async def test_chat_completion_by_stream_and_function_call_and_length(self, test
             or "openrouter" in model_schema_id
             or "anthropic" in model_schema_id
             or "fireworks" in model_schema_id
+            or "deepseek" in model_schema_id
         ):
             pytest.skip("Skip the test case without function call or stream.")
         functions = test_data["functions"]