Skip to content

Commit

Permalink
LiteLLM Minor Fixes & Improvements (09/19/2024) (BerriAI#5793)
Browse files Browse the repository at this point in the history
* fix(model_prices_and_context_window.json): add cost tracking for more vertex llama3.1 model

8b and 70b models

* fix(proxy/utils.py): handle data being none on pre-call hooks

* fix(proxy/): create views on initial proxy startup

fixes base case, where user starts proxy for first time

 Fixes BerriAI#5756

* build(config.yml): fix vertex version for test

* feat(ui/): support enabling/disabling slack alerting

Allows admin to turn on/off slack alerting through ui

* feat(rerank/main.py): support langfuse logging

* fix(proxy/utils.py): fix linting errors

* fix(langfuse.py): log clean metadata

* test(tests): replace deprecated openai model
  • Loading branch information
krrishdholakia authored Sep 20, 2024
1 parent 696fc38 commit 3933fba
Show file tree
Hide file tree
Showing 22 changed files with 644 additions and 93 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ jobs:
pip install "anyio==3.7.1"
pip install "asyncio==3.4.3"
pip install "PyGithub==1.59.1"
pip install "google-cloud-aiplatform==1.59.0"
- run:
name: Build Docker image
command: docker build -t my-app:latest -f Dockerfile.database .
Expand Down
25 changes: 14 additions & 11 deletions db_scripts/create_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,25 @@ async def check_view_exists():

print("LiteLLM_VerificationTokenView Created!") # noqa

sql_query = """
CREATE MATERIALIZED VIEW IF NOT EXISTS "MonthlyGlobalSpend" AS
try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT
DATE_TRUNC('day', "startTime") AS date,
SUM("spend") AS spend
DATE("startTime") AS date,
SUM("spend") AS spend
FROM
"LiteLLM_SpendLogs"
"LiteLLM_SpendLogs"
WHERE
"startTime" >= CURRENT_DATE - INTERVAL '30 days'
"startTime" >= (CURRENT_DATE - INTERVAL '30 days')
GROUP BY
DATE_TRUNC('day', "startTime");
"""
# Execute the queries
await db.execute_raw(query=sql_query)
DATE("startTime");
"""
await db.execute_raw(query=sql_query)

print("MonthlyGlobalSpend Created!") # noqa
print("MonthlyGlobalSpend Created!") # noqa

try:
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
Expand Down
57 changes: 40 additions & 17 deletions litellm/integrations/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
from litellm.secret_managers.main import str_to_bool


class LangFuseLogger:
Expand Down Expand Up @@ -66,6 +67,11 @@ def __init__(
project_id = None

if os.getenv("UPSTREAM_LANGFUSE_SECRET_KEY") is not None:
upstream_langfuse_debug = (
str_to_bool(self.upstream_langfuse_debug)
if self.upstream_langfuse_debug is not None
else None
)
self.upstream_langfuse_secret_key = os.getenv(
"UPSTREAM_LANGFUSE_SECRET_KEY"
)
Expand All @@ -80,7 +86,11 @@ def __init__(
secret_key=self.upstream_langfuse_secret_key,
host=self.upstream_langfuse_host,
release=self.upstream_langfuse_release,
debug=self.upstream_langfuse_debug,
debug=(
upstream_langfuse_debug
if upstream_langfuse_debug is not None
else False
),
)
else:
self.upstream_langfuse = None
Expand Down Expand Up @@ -175,6 +185,7 @@ def log_event(
pass

# end of processing langfuse ########################

if (
level == "ERROR"
and status_message is not None
Expand Down Expand Up @@ -208,6 +219,11 @@ def log_event(
):
input = prompt
output = response_obj["text"]
elif response_obj is not None and isinstance(
response_obj, litellm.RerankResponse
):
input = prompt
output = response_obj.results
elif (
kwargs.get("call_type") is not None
and kwargs.get("call_type") == "pass_through_endpoint"
Expand Down Expand Up @@ -283,14 +299,14 @@ def _log_langfuse_v1(
input,
response_obj,
):
from langfuse.model import CreateGeneration, CreateTrace
from langfuse.model import CreateGeneration, CreateTrace # type: ignore

verbose_logger.warning(
"Please upgrade langfuse to v2.0.0 or higher: https://github.com/langfuse/langfuse-python/releases/tag/v2.0.1"
)

trace = self.Langfuse.trace(
CreateTrace(
trace = self.Langfuse.trace( # type: ignore
CreateTrace( # type: ignore
name=metadata.get("generation_name", "litellm-completion"),
input=input,
output=output,
Expand Down Expand Up @@ -336,6 +352,7 @@ def _log_langfuse_v2(
try:
tags = []
try:
optional_params.pop("metadata")
metadata = copy.deepcopy(
metadata
) # Avoid modifying the original metadata
Expand All @@ -361,7 +378,7 @@ def _log_langfuse_v2(
langfuse.version.__version__
) >= Version("2.7.3")

print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ")
print_verbose("Langfuse Layer Logging - logging to langfuse v2 ")

if supports_tags:
metadata_tags = metadata.pop("tags", [])
Expand Down Expand Up @@ -519,25 +536,31 @@ def _log_langfuse_v2(
if key.lower() not in ["authorization", "cookie", "referer"]:
clean_headers[key] = value

clean_metadata["request"] = {
"method": method,
"url": url,
"headers": clean_headers,
}
# clean_metadata["request"] = {
# "method": method,
# "url": url,
# "headers": clean_headers,
# }
trace = self.Langfuse.trace(**trace_params)

# Log provider specific information as a span
log_provider_specific_information_as_span(trace, clean_metadata)

generation_id = None
usage = None
if response_obj is not None and response_obj.get("id", None) is not None:
generation_id = litellm.utils.get_logging_id(start_time, response_obj)
usage = {
"prompt_tokens": response_obj.usage.prompt_tokens,
"completion_tokens": response_obj.usage.completion_tokens,
"total_cost": cost if supports_costs else None,
}
if response_obj is not None:
if response_obj.get("id", None) is not None:
generation_id = litellm.utils.get_logging_id(
start_time, response_obj
)
_usage_obj = getattr(response_obj, "usage", None)

if _usage_obj:
usage = {
"prompt_tokens": _usage_obj.prompt_tokens,
"completion_tokens": _usage_obj.completion_tokens,
"total_cost": cost if supports_costs else None,
}
generation_name = clean_metadata.pop("generation_name", None)
if generation_name is None:
# if `generation_name` is None, use sensible default values
Expand Down
7 changes: 4 additions & 3 deletions litellm/llms/cohere/rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ def rerank(

request_data_dict = request_data.dict(exclude_none=True)

if _is_async:
return self.async_rerank(request_data_dict=request_data_dict, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method

## LOGGING
litellm_logging_obj.pre_call(
input=request_data_dict,
Expand All @@ -79,6 +76,10 @@ def rerank(
"headers": headers,
},
)

if _is_async:
return self.async_rerank(request_data_dict=request_data_dict, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method

client = _get_httpx_client()
response = client.post(
api_base,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def completion(
client=client,
timeout=timeout,
encoding=encoding,
custom_llm_provider="vertex_ai_beta",
custom_llm_provider="vertex_ai",
)

except Exception as e:
Expand Down
20 changes: 20 additions & 0 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2350,6 +2350,26 @@
"mode": "chat",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
},
"vertex_ai/meta/llama3-70b-instruct-maas": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "vertex_ai-llama_models",
"mode": "chat",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
},
"vertex_ai/meta/llama3-8b-instruct-maas": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "vertex_ai-llama_models",
"mode": "chat",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
},
"vertex_ai/mistral-large@latest": {
"max_tokens": 8191,
"max_input_tokens": 128000,
Expand Down
14 changes: 7 additions & 7 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ model_list:
- model_name: o1-preview
litellm_params:
model: o1-preview

- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY


litellm_settings:
cache: true
# cache_params:
# type: "redis"
# service_name: "mymaster"
# sentinel_nodes:
# - ["localhost", 26379]
success_callback: ["langfuse"]
Loading

0 comments on commit 3933fba

Please sign in to comment.