Merge pull request BerriAI#4549 from BerriAI/litellm_add_groq_whisper

[Feat] Add Groq/whisper-large-v3
nethi · Jul 5, 2024 · 340f731 · 340f731
2 parents 1807c49 + 8206707
commit 340f731
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 8 deletions.
diff --git a/docs/my-website/docs/providers/groq.md b/docs/my-website/docs/providers/groq.md
@@ -157,4 +157,21 @@ if tool_calls:
         model="groq/llama2-70b-4096", messages=messages
     )  # get a new response from the model where it can see the function response
     print("second response\n", second_response)
+```
+
+## Speech to Text - Whisper
+
+```python
+os.environ["GROQ_API_KEY"] = ""
+audio_file = open("/path/to/audio.mp3", "rb")
+
+transcript = litellm.transcription(
+    model="groq/whisper-large-v3",
+    file=audio_file,
+    prompt="Specify context or spelling",
+    temperature=0,
+    response_format="json"
+)
+
+print("response=", transcript)
 ```
diff --git a/litellm/main.py b/litellm/main.py
@@ -4367,6 +4367,8 @@ def transcription(
 
     model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore
 
+    if dynamic_api_key is not None:
+        api_key = dynamic_api_key
     optional_params = {
         "language": language,
         "prompt": prompt,
@@ -4408,7 +4410,7 @@ def transcription(
             azure_ad_token=azure_ad_token,
             max_retries=max_retries,
         )
-    elif custom_llm_provider == "openai":
+    elif custom_llm_provider == "openai" or custom_llm_provider == "groq":
         api_base = (
             api_base
             or litellm.api_base

diff --git a/litellm/tests/gettysburg.wav b/litellm/tests/gettysburg.wav
diff --git a/tests/test_whisper.py → litellm/tests/test_whisper.py b/tests/test_whisper.py → litellm/tests/test_whisper.py
@@ -1,16 +1,22 @@
 # What is this?
 ## Tests `litellm.transcription` endpoint. Outside litellm module b/c of audio file used in testing (it's ~700kb).
 
-import pytest
-import asyncio, time
-import aiohttp, traceback
-from openai import AsyncOpenAI
-import sys, os, dotenv
+import asyncio
+import logging
+import os
+import sys
+import time
+import traceback
 from typing import Optional
+
+import aiohttp
+import dotenv
+import pytest
 from dotenv import load_dotenv
-from litellm.integrations.custom_logger import CustomLogger
+from openai import AsyncOpenAI
+
 import litellm
-import logging
+from litellm.integrations.custom_logger import CustomLogger
 
 # Get the current directory of the file being run
 pwd = os.path.dirname(os.path.realpath(__file__))
@@ -41,6 +47,19 @@ def test_transcription():
 # test_transcription()
 
 
+def test_transcription_groq():
+    litellm.set_verbose = True
+    transcript = litellm.transcription(
+        model="groq/whisper-large-v3",
+        file=audio_file,
+    )
+    print(f"response=: {transcript.model_dump()}")
+    print(f"hidden_params: {transcript._hidden_params}")
+
+
+# test_transcription()
+
+
 def test_transcription_azure():
     litellm.set_verbose = True
     transcript = litellm.transcription(