Skip to content

Commit

Permalink
chore: update inference some providers
Browse files Browse the repository at this point in the history
  • Loading branch information
LinkW77 authored and DynamesC committed Aug 22, 2024
1 parent c5910da commit 4f1e820
Show file tree
Hide file tree
Showing 14 changed files with 105 additions and 12 deletions.
6 changes: 6 additions & 0 deletions inference/providers/azure_openai/resources/i18n/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,11 @@ gpt_4_turbo_description: "GPT-4 Turbo model featuring improved instruction follo
gpt_4_vision_name: "GPT-4-Turbo-Vision"
gpt_4_vision_description: "GPT-4 with the ability to understand images, in addition to all other GPT-4 Turbo capabilities."

gpt_4o_name: "GPT-4o"
gpt_4o_description: "GPT-4o integrates text and images in a single model, enabling it to handle multiple data types simultaneously."

gpt_4o_mini_name: "GPT-4o-Mini"
gpt_4o_mini_description: "GPT-4o-Mini is a smaller version of GPT-4o that is only capable of handling natural language and images."

text_embedding_ada_002_name: "text-embedding-ada-002"
text_embedding_ada_002_description: "Stronger performance. It outperforms all the old embedding models on text search, code search, and sentence similarity tasks and gets comparable performance on text classification."
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ config_schemas:
- config_id: stop

pricing:
input_token: 0.0015
input_token: 0.001
output_token: 0.002
unit: 1000
currency: USD
40 changes: 40 additions & 0 deletions inference/providers/azure_openai/resources/models/gpt-4o-mini.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
model_schema_id: azure_openai/gpt-4o-mini
provider_model_id: gpt-4o-mini
type: chat_completion
name: "i18n:gpt_4o_mini_name"
description: "i18n:gpt_4o_mini_description"

properties:
function_call: true
streaming: true
input_token_limit: 128000
output_token_limit: 4096

config_schemas:
- config_id: temperature
- config_id: top_p
- config_id: presence_penalty
type: float
default: 0.0
min: -2.0
max: 2.0
step: 0.1
- config_id: frequency_penalty
type: float
default: 0.0
min: -2.0
max: 2.0
step: 0.01
- config_id: max_tokens
type: int
default: 4096
min: 1
max: 4096
step: 1
- config_id: stop

pricing:
input_token: 0.005
output_token: 0.015
unit: 1000
currency: USD
40 changes: 40 additions & 0 deletions inference/providers/azure_openai/resources/models/gpt-4o.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
model_schema_id: azure_openai/gpt-4o
provider_model_id: gpt-4o
type: chat_completion
name: "i18n:gpt_4o_name"
description: "i18n:gpt_4o_description"

properties:
function_call: true
streaming: true
input_token_limit: 128000
output_token_limit: 4096

config_schemas:
- config_id: temperature
- config_id: top_p
- config_id: presence_penalty
type: float
default: 0.0
min: -2.0
max: 2.0
step: 0.1
- config_id: frequency_penalty
type: float
default: 0.0
min: -2.0
max: 2.0
step: 0.01
- config_id: max_tokens
type: int
default: 4096
min: 1
max: 4096
step: 1
- config_id: stop

pricing:
input_token: 0.005
output_token: 0.015
unit: 1000
currency: USD
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ default_endpoint_url: https://api.cohere.com/v1/messages
properties:
function_call: false
streaming: true
input_token_limit: 8192
input_token_limit: 4096
output_token_limit: 4096

config_schemas:
- config_id: temperature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ properties:
function_call: false
streaming: true
input_token_limit: 4096
output_token_limit: 4096

config_schemas:
- config_id: temperature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ default_endpoint_url: https://api.cohere.com/v1/messages
properties:
function_call: false
streaming: true
input_token_limit: 8192
input_token_limit: 128000
output_token_limit: 128000

config_schemas:
- config_id: temperature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ properties:
function_call: false
streaming: true
input_token_limit: 128000
output_token_limit: 4096

config_schemas:
- config_id: temperature
Expand Down
1 change: 1 addition & 0 deletions inference/providers/cohere/resources/models/command-r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ properties:
function_call: false
streaming: true
input_token_limit: 128000
output_token_limit: 4096

config_schemas:
- config_id: temperature
Expand Down
1 change: 1 addition & 0 deletions inference/providers/cohere/resources/models/command.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ properties:
function_call: false
streaming: true
input_token_limit: 4096
output_token_limit: 4096

config_schemas:
- config_id: temperature
Expand Down
14 changes: 7 additions & 7 deletions inference/providers/deepseek/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ def _build_deepseek_chat_completion_payload(
if value is not None:
payload[key] = value

# if function_call:
# if function_call in ["none", "auto"]:
# payload["tool_choice"] = function_call
# else:
# payload["tool_choice"] = {"name": function_call}
# if functions:
# payload["tools"] = [{"type": "function", "function": f.model_dump()} for f in functions]
if function_call:
if function_call in ["none", "auto"]:
payload["tool_choice"] = function_call
else:
payload["tool_choice"] = {"name": function_call}
if functions:
payload["tools"] = [{"type": "function", "function": f.model_dump()} for f in functions]
logger.debug(f"_build_deepseek_chat_completion_payload: {payload}")
return payload

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: "i18n:deepseek_chat_description"
default_endpoint_url: https://api.deepseek.com/chat/completions

properties:
function_call: false
function_call: true
streaming: true
input_token_limit: 128000
output_token_limit: 4000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: "i18n:deepseek_coder_description"
default_endpoint_url: https://api.deepseek.com/chat/completions

properties:
function_call: false
function_call: true
streaming: true
input_token_limit: 128000
output_token_limit: 4000
Expand Down
1 change: 1 addition & 0 deletions inference/test/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ async def test_chat_completion_by_stream_and_function_call_and_length(self, test
or "openrouter" in model_schema_id
or "anthropic" in model_schema_id
or "fireworks" in model_schema_id
or "deepseek" in model_schema_id
):
pytest.skip("Skip the test case without function call or stream.")
functions = test_data["functions"]
Expand Down

0 comments on commit 4f1e820

Please sign in to comment.