Add TTS voice selection and ElevenLabs integration

rayisaninja · Jan 4, 2024 · 9e51661 · 9e51661
1 parent d6a2862
commit 9e51661
Show file tree

Hide file tree

Showing 5 changed files with 86 additions and 372 deletions.
diff --git a/.env.template b/.env.template
@@ -3,7 +3,8 @@
 # This template file contains environment variables required for the application.
 # Copy this file to '.env' and fill in the necessary API keys and settings.
 # Please refer to the respective API documentation for more details on obtaining API keys.
-MAIN_SYSTEM_PROMPT=You are an AI Assistant
+
+MAIN_SYSTEM_PROMPT=You are an AI Assistant integrated within a Python-based application designed to assist users by leveraging a suite of tools and functions, both synchronous and asynchronous, to process user requests and manage dynamic workflows. Your capabilities include interacting with a larger AI language model (LLM) for synchronous and asynchronous assistance, accessing the current date and time, and utilizing enabled plugins for additional functionalities. You are expected to maintain a conversation memory, ensuring the context remains within the token limit for efficient processing. When responding to user requests, consider the available tools and their descriptions, dynamically structuring workflows to include multiple turns where necessary. Prioritize reasoning and delivering the best possible response based on the users original request, taking into account the data gathered and actions completed during the interaction. Ensure that your responses are clear, concise, and directly address the users needs, while also being prepared to handle errors or unexpected situations gracefully.
 
 LOGGING_ENABLED=True
 LOGGING_LEVEL=debug
@@ -42,27 +43,45 @@ MAX_TOKENS=4095
 
 ##############################################################################################################
 
-ENABLE_ACCUWEATHER_BASE=False
 ENABLE_ACCUWEATHERPLUGIN=False
 
-ENABLE_GEMINI_PRO_BASE=False
 ENABLE_GEMINIPROPLUGIN=False
 
-ENABLE_GMAIL_BASE=False
 ENABLE_GMAILPLUGIN=False
 
-ENABLE_GOOGLE_SEARCH_BASE=False
 ENABLE_GOOGLESEARCHPLUGIN=False
 
-ENABLE_NEWS_BASE=False
 ENABLE_NEWSPLUGIN=False
 
-ENABLE_NHTSA_VPIC_BASE=False
 ENABLE_NHTSAVPICPLUGIN=False
 
-ENABLE_SYSTEM_COMMANDS_BASE=False
 ENABLE_SYSTEMCOMMANDSPLUGIN=False
 
+##############################################################################################################
+# TTS SETTINGS
+### ELEVENLABS API
+## Eleven Labs Default Voice IDs
+## Rachel : 21m00Tcm4TlvDq8ikWAM
+## Domi : AZnzlk1XvdvUeBnXmlld
+## Bella : EXAVITQu4vr4xnSDxMaL
+## Antoni : ErXwobaYiN019PkySvjV
+## Elli : MF3mGyEYCl7XYWbV9V6O
+## Josh : TxGEqnHWrfWFTfGW9XjX
+## Arnold : VR6AewLTigWG4xSOukaG
+## Adam : pNInz6obpgDQGcFmaJgB
+## Sam : yoZ06aMxZJJ28mfd3POQ
+##############################################################################################################
+
+# switch between elevenlabs or pyttsx3
+TTS_ENGINE=pyttsx3
+
+ELEVEN_API_KEY=
+ELEVENLABS_VOICE=
+
+# pyttsx3 Win 11 David or Zira
+TTS_VOICE_ID=Microsoft Zira Desktop - English (United States)
+TTS_RATE=169
+
 ##############################################################################################################
 # GEMINI PRO SETTINGS
 # Obtain your API key from: https://makersuite.google.com/app/apikey
@@ -93,10 +112,8 @@ GMAIL_ADDRESS=
 GOOGLE_CLIENT_ID=
 GOOGLE_CLIENT_SECRET=
 
-GOOGLE_API_KEY=
-
 # Your Google Custom Search Engine ID (required if tools are enabled)
-
+GOOGLE_API_KEY=
 GOOGLE_CSE_ID=
 
 #########################################################################################

diff --git a/config.py b/config.py
@@ -69,3 +69,5 @@
 
 # Configures the main app to use the local system TTS rate.
 TTS_RATE = int(os.getenv("TTS_RATE", str(150)))
+
+ELEVENLABS_VOICE = os.getenv("ELEVENLABS_VOICE", "Rachel")
diff --git a/output_methods/audio_pyttsx3.py b/output_methods/audio_pyttsx3.py
@@ -2,34 +2,77 @@
 # !/usr/bin/env python
 # coding: utf-8
 # Filename: audio_pyttsx3.py
-# File Path: output/audio_pyttsx3.py
+# File Path: output\audio_pyttsx3.py
+# Last modified by: ExplorerGT92
+# Last modified on: 2023/12/17
+# branch: voice_rec_and_tts
 
 """
-This module is responsible for handling TTS audio output.
-
-It uses pyttsx3 as the TTS engine.
+This module is responsible for handling audio output.
 
 """
 
 import os
-import logging
+os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
 from typing import Union
 from io import BytesIO
 import pyttsx3
-os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
 import pygame
-from config import TTS_ENGINE, TTS_VOICE_ID, TTS_RATE
+from dotenv import load_dotenv
+from config import TTS_ENGINE, TTS_VOICE_ID, TTS_RATE, ELEVENLABS_VOICE
+
+# Import ElevenLabs functions
+from elevenlabs import generate, play, set_api_key, get_api_key, stream
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Set the ElevenLabs API key if it exists in the environment
+ELEVEN_API_KEY = os.getenv('ELEVEN_API_KEY')
+if ELEVEN_API_KEY:
+    set_api_key(ELEVEN_API_KEY)
+
+# Other functions remain unchanged...
+
+def tts_output(text):
+    """
+    This function outputs the given text as speech.
+
+    Args:
+        text (str): The text to output.
+    """
+
+    if TTS_ENGINE == "pyttsx3":
+        tts_output_pyttsx3(text)
+    elif TTS_ENGINE == "elevenlabs" and ELEVEN_API_KEY:
+        tts_output_elevenlabs(text)
+    else:
+        raise ValueError(f"Invalid TTS_ENGINE value or missing ElevenLabs API key: {TTS_ENGINE}")
+
+
+def tts_output_elevenlabs(text):
+    """
+    This function outputs the given text as speech using ElevenLabs API.
+
+    Args:
+        text (str): The text to output.
+    """
+    # Generate audio using ElevenLabs API
+    audio_bytes = generate(
+        text=text,
+        voice=ELEVENLABS_VOICE,  # Replace with the desired voice
+        model="eleven_multilingual_v2",
+        stream=False,  # Set to True if you want to stream the audio
+        output_format="mp3_44100_128"
+    )
 
-comtypes_logger = logging.getLogger('comtypes')
-# Set the logging level to WARNING to ignore DEBUG messages
-comtypes_logger.setLevel(logging.WARNING)
+    # Play the generated audio
+    play(audio=audio_bytes)
 
 
 def initialize_audio():
     """
     This function initializes the audio system.
-
-    # TODO: Add support for other TTS engines.
     """
     pygame.mixer.pre_init(44100, -16, 2, 4096)
     pygame.mixer.init()
@@ -41,8 +84,6 @@ def play_audio(audio: Union[bytes, BytesIO]):
 
     Args:
         audio (bytes or BytesIO): The audio to play.
-
-    # TODO: Add support for other TTS engines.
     """
 
     if not isinstance(audio, (bytes, BytesIO)):
@@ -56,31 +97,13 @@ def play_audio(audio: Union[bytes, BytesIO]):
         pygame.time.wait(10)
 
 
-def tts_output(text):
-    """
-    This function outputs the given text as speech.
-
-    Args:
-        text (str): The text to output.
-
-    # TODO: Add support for other TTS engines.
-    """
-
-    if TTS_ENGINE == "pyttsx3":
-        tts_output_pyttsx3(text)
-    else:
-        raise ValueError(f"Invalid TTS_ENGINE value: {TTS_ENGINE}")
-
-
 def tts_output_pyttsx3(text):
 
     """
     This function outputs the given text as speech using pyttsx3.
 
     Args:
         text (str): The text to output.
-    
-    # TODO: Add support for other TTS engines.
     """
 
     engine = pyttsx3.init('sapi5')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -69,3 +69,5 @@

		# Configures the main app to use the local system TTS rate.
		TTS_RATE = int(os.getenv("TTS_RATE", str(150)))

		ELEVENLABS_VOICE = os.getenv("ELEVENLABS_VOICE", "Rachel")