Update

rayisaninja · Jan 26, 2024 · 6666e6b · 6666e6b
1 parent 3b4980c
commit 6666e6b
Show file tree

Hide file tree

Showing 5 changed files with 221 additions and 27 deletions.
diff --git a/.env.template b/.env.template
@@ -43,7 +43,7 @@ MAX_TOKENS=4095
 
 ##############################################################################################################
 
-ENABLE_ACCUWEATHERPLUGIN=False
+ENABLE_ACCUWEATHERPLUGIN=True
 
 ENABLE_GEMINIPROPLUGIN=False
 

diff --git a/app.py b/app.py
@@ -40,13 +40,16 @@
 from utils.openai_model_tools import (
     ask_chat_gpt_4_0314_synchronous,
     ask_chat_gpt_4_0314_asynchronous,
+    ask_chat_gpt_4_32k_0314_synchronous,
+    ask_chat_gpt_4_32k_0314_asynchronous,
     ask_chat_gpt_4_0613_synchronous,
     ask_chat_gpt_4_0613_asynchronous,
     ask_gpt_4_vision,
 )
 from utils.openai_dalle_tools import generate_an_image_with_dalle3
 from utils.core_tools import get_current_date_time, display_help
 from output_methods.audio_pyttsx3 import tts_output
+
 from plugins.plugins_enabled import enable_plugins
 
 sys.path.append(str(Path(__file__).parent))
@@ -383,6 +386,8 @@ async def main():
         "get_current_date_time": get_current_date_time,
         "ask_chat_gpt_4_0314_synchronous": ask_chat_gpt_4_0314_synchronous,
         "ask_chat_gpt_4_0314_asynchronous": ask_chat_gpt_4_0314_asynchronous,
+        "ask_chat_gpt_4_32k_0314_synchronous": ask_chat_gpt_4_32k_0314_synchronous,
+        "ask_chat_gpt_4_32k_0314_asynchronous": ask_chat_gpt_4_32k_0314_asynchronous,
         "ask_chat_gpt_4_0613_synchronous": ask_chat_gpt_4_0613_synchronous,
         "ask_chat_gpt_4_0613_asynchronous": ask_chat_gpt_4_0613_asynchronous,
         "generate_an_image_with_dalle3": generate_an_image_with_dalle3,
@@ -402,7 +407,7 @@ async def main():
             "type": "function",
             "function": {
                 "name": "ask_chat_gpt_4_0314_synchronous",
-                "description": "This function allows you to ask a larger AI LLM for assistance synchronously, like asking a more experienced colleague for assistance. This LLMs maximum token output limit is 2048 and this model's maximum context length is 8192 tokens",
+                "description": "This function allows you to ask a larger AI LLM for assistance synchronously, like asking a more experienced colleague for assistance.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -427,7 +432,57 @@ async def main():
             "type": "function",
             "function": {
                 "name": "ask_chat_gpt_4_0314_asynchronous",
-                "description": "This function allows you to ask a larger AI LLM for assistance asynchronously, like asking a more experienced colleague for assistance. This LLMs maximum token output limit is 2048 and this model's maximum context length is 8192 tokens",
+                "description": "This function allows you to ask a larger AI LLM for assistance asynchronously, like asking a more experienced colleague for assistance.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "temperature": {
+                            "type": "integer",
+                            "description": "The temperature associated with request: 0 for factual, 2 for creative.",
+                        },
+                        "question": {
+                            "type": "string",
+                            "description": "What are you, the ai assistant, requesting to be done with the text you are providing?",
+                        },
+                        "text": {
+                            "type": "string",
+                            "description": "The text to be analyzed",
+                        },
+                    },
+                    "required": ["question", "text"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "ask_chat_gpt_4_32k_0314_synchronous",
+                "description": "This function allows you to ask a larger AI LLM for assistance synchronously, like asking a more experienced colleague for assistance.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "temperature": {
+                            "type": "integer",
+                            "description": "The temperature associated with request: 0 for factual, 2 for creative.",
+                        },
+                        "question": {
+                            "type": "string",
+                            "description": "What are you, the ai assistant, requesting to be done with the text you are providing?",
+                        },
+                        "text": {
+                            "type": "string",
+                            "description": "The text to be analyzed",
+                        },
+                    },
+                    "required": ["question", "text"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "ask_chat_gpt_4_32k_0314_asynchronous",
+                "description": "This function allows you to ask a larger AI LLM for assistance asynchronously, like asking a more experienced colleague for assistance.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -452,7 +507,7 @@ async def main():
             "type": "function",
             "function": {
                 "name": "ask_chat_gpt_4_0613_synchronous",
-                "description": "This function allows you to ask a larger AI LLM for assistance synchronously, like asking a more experienced colleague for assistance. This LLMs maximum token output limit is 2048 and this model's maximum context length is 8192 tokens",
+                "description": "This function allows you to ask a larger AI LLM for assistance synchronously, like asking a more experienced colleague for assistance.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -468,6 +523,14 @@ async def main():
                             "type": "string",
                             "description": "The text to be analyzed",
                         },
+                        "tools": {
+                            "type": "string",
+                            "description": "The tools to use for the request.",
+                        },
+                        "tool_choice": {
+                            "type": "string",
+                            "description": "The tool choice to use for the request.",
+                        },
                     },
                     "required": ["question", "text"],
                 },
@@ -477,7 +540,7 @@ async def main():
             "type": "function",
             "function": {
                 "name": "ask_chat_gpt_4_0613_asynchronous",
-                "description": "This function allows you to ask a larger AI LLM for assistance asynchronously, like asking a more experienced colleague for assistance. This LLMs maximum token output limit is 2048 and this model's maximum context length is 8192 tokens",
+                "description": "This function allows you to ask a larger AI LLM for assistance asynchronously, like asking a more experienced colleague for assistance.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -493,6 +556,14 @@ async def main():
                             "type": "string",
                             "description": "The text to be analyzed",
                         },
+                        "tools": {
+                            "type": "string",
+                            "description": "The tools to use for the request.",
+                        },
+                        "tool_choice": {
+                            "type": "string",
+                            "description": "The tool choice to use for the request.",
+                        },
                     },
                     "required": ["question", "text"],
                 },
@@ -622,7 +693,7 @@ async def main():
                 if use_tts:
                     # Use TTS to output the final response
                     console.print("\n" + final_text, style="green")
-                    tts_output(final_text)
+                    tts_output(final_text)  # Call the tts_output function directly
                 else:
                     # Print the final response to the console
                     console.print("\n" + final_text, style="green")

diff --git a/output_methods/audio_pyttsx3.py b/output_methods/audio_pyttsx3.py
@@ -2,16 +2,12 @@
 # !/usr/bin/env python
 # coding: utf-8
 # Filename: audio_pyttsx3.py
-# File Path: output\audio_pyttsx3.py
-# Last modified by: ExplorerGT92
-# Last modified on: 2023/12/17
-# branch: voice_rec_and_tts
+# File Path: output/audio_pyttsx3.py
 
 """
 This module is responsible for handling audio output.
 
 """
-
 import os
 os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
 from typing import Union
@@ -20,6 +16,9 @@
 import pygame
 from dotenv import load_dotenv
 from config import TTS_ENGINE, TTS_VOICE_ID, TTS_RATE, ELEVENLABS_VOICE
+import websockets
+import base64
+import asyncio
 
 # Import ElevenLabs functions
 from elevenlabs import generate, play, set_api_key, get_api_key, stream
@@ -31,8 +30,8 @@
 ELEVEN_API_KEY = os.getenv('ELEVEN_API_KEY')
 if ELEVEN_API_KEY:
     set_api_key(ELEVEN_API_KEY)
+TTS_ENGINE = os.getenv('TTS_ENGINE')
 
-# Other functions remain unchanged...
 
 def tts_output(text):
     """
@@ -41,33 +40,72 @@ def tts_output(text):
     Args:
         text (str): The text to output.
     """
-
     if TTS_ENGINE == "pyttsx3":
         tts_output_pyttsx3(text)
     elif TTS_ENGINE == "elevenlabs" and ELEVEN_API_KEY:
-        tts_output_elevenlabs(text)
+        # Since tts_output_elevenlabs is an async function, we need to run it with asyncio.run
+        asyncio.run(tts_output_elevenlabs(text))
     else:
         raise ValueError(f"Invalid TTS_ENGINE value or missing ElevenLabs API key: {TTS_ENGINE}")
 
 
+async def stream_elevenlabs(audio_stream):
+    """Stream audio data using pygame player."""
+    initialize_audio()
+    async for chunk in audio_stream:
+        if chunk:
+            play_audio(chunk)
+
+
+async def text_to_speech_input_streaming(voice_id, text_iterator):
+    """Send text to ElevenLabs API and stream the returned audio."""
+    uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream-input?model_id=eleven_monolingual_v1"
+
+    async with websockets.connect(uri) as websocket:
+        await websocket.send(json.dumps({
+            "text": " ",
+            "voice_settings": {"stability": 0.5, "similarity_boost": 0.8},
+            "xi_api_key": ELEVEN_API_KEY,
+        }))
+
+        async def listen():
+            """Listen to the websocket for audio data and stream it."""
+            while True:
+                try:
+                    message = await websocket.recv()
+                    data = json.loads(message)
+                    if data.get("audio"):
+                        yield base64.b64decode(data["audio"])
+                    elif data.get('isFinal'):
+                        break
+                except websockets.exceptions.ConnectionClosed as e:
+                    print(f"Connection closed with error: {e}")
+                    break
+                except websockets.exceptions.ConnectionClosedOK:
+                    print("Connection closed without error.")
+                    break
+
+        listen_task = asyncio.create_task(stream_elevenlabs(listen()))
+
+        async for text in text_iterator:
+            await websocket.send(json.dumps({"text": text, "try_trigger_generation": True}))
+
+        await websocket.send(json.dumps({"text": ""}))
+
+        await listen_task
+
+
 def tts_output_elevenlabs(text):
     """
-    This function outputs the given text as speech using ElevenLabs API.
+    This function outputs the given text as speech using ElevenLabs API with streaming.
 
     Args:
         text (str): The text to output.
     """
-    # Generate audio using ElevenLabs API
-    audio_bytes = generate(
-        text=text,
-        voice=ELEVENLABS_VOICE,  # Replace with the desired voice
-        model="eleven_multilingual_v2",
-        stream=False,  # Set to True if you want to stream the audio
-        output_format="mp3_44100_128"
-    )
-
-    # Play the generated audio
-    play(audio=audio_bytes)
+    async def text_iterator():
+        yield text
+
+    asyncio.run(text_to_speech_input_streaming(ELEVENLABS_VOICE, text_iterator()))
 
 
 def initialize_audio():

diff --git a/requirements.txt b/requirements.txt
@@ -8,7 +8,7 @@ google-auth-oauthlib==1.2.0
 google-api-python-client==2.111.0
 google-cloud-aiplatform==1.38.1
 google-generativeai==0.3.1
-openai==1.7.0
+openai==1.10.0
 pygame==2.5.2
 pyttsx3==2.90
 python-dotenv==1.0.0

diff --git a/utils/openai_model_tools.py b/utils/openai_model_tools.py
@@ -214,6 +214,91 @@ def encode_image(image_path):
         encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
     return f"data:{mime_type};base64,{encoded_string}"
 
+
+def ask_chat_gpt_4_32k_0314_synchronous(**kwargs) -> str:
+    """
+    Ask ChatGPT a question and return the response.
+
+    Args:
+        kwargs (dict): The keyword arguments to pass to the function.
+    Returns:
+        str: The response from ChatGPT.
+    """
+
+    question = kwargs.get("question", "")
+    text = kwargs.get("text", "")
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a specialized AI language model designed to act as an expert tool within a larger conversational system. Your role is to provide detailed and expert-level responses to queries directed to you by the controller AI. You should focus on delivering precise information and insights based on your specialized knowledge and capabilities. Your responses should be concise, relevant, and strictly within the scope of the expertise you represent. You are not responsible for maintaining the overall conversation with the end user, but rather for supporting the controller AI by processing and responding to specific requests for information or analysis. Adhere to the constraints provided by the controller, such as token limits and context relevance, and ensure that your contributions are well-reasoned and can be seamlessly integrated into the broader conversation managed by the controller AI.",
+        },
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": text},
+    ]
+
+    response = gpt4_client.chat.completions.create(
+        model="gpt-4-32k-0314",
+        messages=messages,
+        temperature=0,
+        max_tokens=2048,
+        top_p=0.3,
+        frequency_penalty=0,
+        presence_penalty=0,
+    )
+
+    if (
+        response.choices
+        and response.choices[0].message
+        and response.choices[0].message.content
+    ):
+        return response.choices[0].message.content
+    else:
+        return "An error occurred or no content was returned."
+
+
+async def ask_chat_gpt_4_32k_0314_asynchronous(**kwargs) -> str:
+    """
+    Ask ChatGPT a question and return the response.
+
+    Args:
+        kwargs (dict): The keyword arguments to pass to the function.
+    Returns:
+        str: The response from ChatGPT.
+    """
+
+    question = kwargs.get("question", "")
+    text = kwargs.get("text", "")
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a specialized AI language model designed to act as an expert tool within a larger conversational system. Your role is to provide detailed and expert-level responses to queries directed to you by the controller AI. You should focus on delivering precise information and insights based on your specialized knowledge and capabilities. Your responses should be concise, relevant, and strictly within the scope of the expertise you represent. You are not responsible for maintaining the overall conversation with the end user, but rather for supporting the controller AI by processing and responding to specific requests for information or analysis. Adhere to the constraints provided by the controller, such as token limits and context relevance, and ensure that your contributions are well-reasoned and can be seamlessly integrated into the broader conversation managed by the controller AI.",
+        },
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": text},
+    ]
+
+    response = await gpt4_client_async.chat.completions.create(
+        model="gpt-4-32k-0314",
+        messages=messages,
+        temperature=0.2,
+        max_tokens=2048,
+        top_p=0.5,
+        frequency_penalty=0,
+        presence_penalty=0,
+    )
+
+    if (
+        response.choices
+        and response.choices[0].message
+        and response.choices[0].message.content
+    ):
+        return response.choices[0].message.content
+    else:
+        return "An error occurred or no content was returned."
+
+
 # Function to send the image to the vision model
 async def ask_gpt_4_vision(image_name, drive_service=None):
     # Check if the image exists in the local uploads folder