Skip to content

Commit

Permalink
Add TTS voice selection and ElevenLabs integration
Browse files Browse the repository at this point in the history
  • Loading branch information
ExplorerGT92 committed Jan 4, 2024
1 parent d6a2862 commit 9e51661
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 372 deletions.
39 changes: 28 additions & 11 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# This template file contains environment variables required for the application.
# Copy this file to '.env' and fill in the necessary API keys and settings.
# Please refer to the respective API documentation for more details on obtaining API keys.
MAIN_SYSTEM_PROMPT=You are an AI Assistant

MAIN_SYSTEM_PROMPT=You are an AI Assistant integrated within a Python-based application designed to assist users by leveraging a suite of tools and functions, both synchronous and asynchronous, to process user requests and manage dynamic workflows. Your capabilities include interacting with a larger AI language model (LLM) for synchronous and asynchronous assistance, accessing the current date and time, and utilizing enabled plugins for additional functionalities. You are expected to maintain a conversation memory, ensuring the context remains within the token limit for efficient processing. When responding to user requests, consider the available tools and their descriptions, dynamically structuring workflows to include multiple turns where necessary. Prioritize reasoning and delivering the best possible response based on the users original request, taking into account the data gathered and actions completed during the interaction. Ensure that your responses are clear, concise, and directly address the users needs, while also being prepared to handle errors or unexpected situations gracefully.

LOGGING_ENABLED=True
LOGGING_LEVEL=debug
Expand Down Expand Up @@ -42,27 +43,45 @@ MAX_TOKENS=4095

##############################################################################################################

ENABLE_ACCUWEATHER_BASE=False
ENABLE_ACCUWEATHERPLUGIN=False

ENABLE_GEMINI_PRO_BASE=False
ENABLE_GEMINIPROPLUGIN=False

ENABLE_GMAIL_BASE=False
ENABLE_GMAILPLUGIN=False

ENABLE_GOOGLE_SEARCH_BASE=False
ENABLE_GOOGLESEARCHPLUGIN=False

ENABLE_NEWS_BASE=False
ENABLE_NEWSPLUGIN=False

ENABLE_NHTSA_VPIC_BASE=False
ENABLE_NHTSAVPICPLUGIN=False

ENABLE_SYSTEM_COMMANDS_BASE=False
ENABLE_SYSTEMCOMMANDSPLUGIN=False

##############################################################################################################
# TTS SETTINGS
### ELEVENLABS API
## Eleven Labs Default Voice IDs
## Rachel : 21m00Tcm4TlvDq8ikWAM
## Domi : AZnzlk1XvdvUeBnXmlld
## Bella : EXAVITQu4vr4xnSDxMaL
## Antoni : ErXwobaYiN019PkySvjV
## Elli : MF3mGyEYCl7XYWbV9V6O
## Josh : TxGEqnHWrfWFTfGW9XjX
## Arnold : VR6AewLTigWG4xSOukaG
## Adam : pNInz6obpgDQGcFmaJgB
## Sam : yoZ06aMxZJJ28mfd3POQ
##############################################################################################################

# switch between elevenlabs or pyttsx3
TTS_ENGINE=pyttsx3

ELEVEN_API_KEY=
ELEVENLABS_VOICE=

# pyttsx3 Win 11 David or Zira
TTS_VOICE_ID=Microsoft Zira Desktop - English (United States)
TTS_RATE=169

##############################################################################################################
# GEMINI PRO SETTINGS
# Obtain your API key from: https://makersuite.google.com/app/apikey
Expand Down Expand Up @@ -93,10 +112,8 @@ GMAIL_ADDRESS=
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=

GOOGLE_API_KEY=

# Your Google Custom Search Engine ID (required if tools are enabled)

GOOGLE_API_KEY=
GOOGLE_CSE_ID=

#########################################################################################
Expand Down
2 changes: 2 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,5 @@

# Configures the main app to use the local system TTS rate.
TTS_RATE = int(os.getenv("TTS_RATE", str(150)))

ELEVENLABS_VOICE = os.getenv("ELEVENLABS_VOICE", "Rachel")
87 changes: 55 additions & 32 deletions output_methods/audio_pyttsx3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,77 @@
# !/usr/bin/env python
# coding: utf-8
# Filename: audio_pyttsx3.py
# File Path: output/audio_pyttsx3.py
# File Path: output\audio_pyttsx3.py
# Last modified by: ExplorerGT92
# Last modified on: 2023/12/17
# branch: voice_rec_and_tts

"""
This module is responsible for handling TTS audio output.
It uses pyttsx3 as the TTS engine.
This module is responsible for handling audio output.
"""

import os
import logging
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
from typing import Union
from io import BytesIO
import pyttsx3
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
import pygame
from config import TTS_ENGINE, TTS_VOICE_ID, TTS_RATE
from dotenv import load_dotenv
from config import TTS_ENGINE, TTS_VOICE_ID, TTS_RATE, ELEVENLABS_VOICE

# Import ElevenLabs functions
from elevenlabs import generate, play, set_api_key, get_api_key, stream

# Load environment variables from .env file
load_dotenv()

# Set the ElevenLabs API key if it exists in the environment
ELEVEN_API_KEY = os.getenv('ELEVEN_API_KEY')
if ELEVEN_API_KEY:
set_api_key(ELEVEN_API_KEY)

# Other functions remain unchanged...

def tts_output(text):
"""
This function outputs the given text as speech.
Args:
text (str): The text to output.
"""

if TTS_ENGINE == "pyttsx3":
tts_output_pyttsx3(text)
elif TTS_ENGINE == "elevenlabs" and ELEVEN_API_KEY:
tts_output_elevenlabs(text)
else:
raise ValueError(f"Invalid TTS_ENGINE value or missing ElevenLabs API key: {TTS_ENGINE}")


def tts_output_elevenlabs(text):
"""
This function outputs the given text as speech using ElevenLabs API.
Args:
text (str): The text to output.
"""
# Generate audio using ElevenLabs API
audio_bytes = generate(
text=text,
voice=ELEVENLABS_VOICE, # Replace with the desired voice
model="eleven_multilingual_v2",
stream=False, # Set to True if you want to stream the audio
output_format="mp3_44100_128"
)

comtypes_logger = logging.getLogger('comtypes')
# Set the logging level to WARNING to ignore DEBUG messages
comtypes_logger.setLevel(logging.WARNING)
# Play the generated audio
play(audio=audio_bytes)


def initialize_audio():
"""
This function initializes the audio system.
# TODO: Add support for other TTS engines.
"""
pygame.mixer.pre_init(44100, -16, 2, 4096)
pygame.mixer.init()
Expand All @@ -41,8 +84,6 @@ def play_audio(audio: Union[bytes, BytesIO]):
Args:
audio (bytes or BytesIO): The audio to play.
# TODO: Add support for other TTS engines.
"""

if not isinstance(audio, (bytes, BytesIO)):
Expand All @@ -56,31 +97,13 @@ def play_audio(audio: Union[bytes, BytesIO]):
pygame.time.wait(10)


def tts_output(text):
"""
This function outputs the given text as speech.
Args:
text (str): The text to output.
# TODO: Add support for other TTS engines.
"""

if TTS_ENGINE == "pyttsx3":
tts_output_pyttsx3(text)
else:
raise ValueError(f"Invalid TTS_ENGINE value: {TTS_ENGINE}")


def tts_output_pyttsx3(text):

"""
This function outputs the given text as speech using pyttsx3.
Args:
text (str): The text to output.
# TODO: Add support for other TTS engines.
"""

engine = pyttsx3.init('sapi5')
Expand Down
Loading

0 comments on commit 9e51661

Please sign in to comment.