Skip to content

Commit

Permalink
新增openai tts接入
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Nov 9, 2023
1 parent acd96fd commit 525e703
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 3 deletions.
9 changes: 8 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
"history_enable": true,
"history_max_len": 500
},
"audio_synthesis_type": "genshinvoice_top",
"audio_synthesis_type": "edge-tts",
"audio_random_speed": {
"normal": {
"enable": false,
Expand Down Expand Up @@ -269,6 +269,13 @@
"voice_preset": "ikaros",
"voice_preset_file_path": "D:\\GitHub_pro\\AI-Vtuber\\tests\\test_VALL-E-X\\ikaros.npz"
},
"openai_tts": {
"type": "api",
"api_ip_port": "https://ysharma-openai-tts-new.hf.space/--replicas/zcq5n/",
"model": "tts-1",
"voice": "nova",
"api_key": "你的openai api key"
},
"chatterbot": {
"name": "bot",
"db_path": "db.sqlite3"
Expand Down
9 changes: 8 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
"history_enable": true,
"history_max_len": 500
},
"audio_synthesis_type": "genshinvoice_top",
"audio_synthesis_type": "edge-tts",
"audio_random_speed": {
"normal": {
"enable": false,
Expand Down Expand Up @@ -269,6 +269,13 @@
"voice_preset": "ikaros",
"voice_preset_file_path": "D:\\GitHub_pro\\AI-Vtuber\\tests\\test_VALL-E-X\\ikaros.npz"
},
"openai_tts": {
"type": "api",
"api_ip_port": "https://ysharma-openai-tts-new.hf.space/--replicas/zcq5n/",
"model": "tts-1",
"voice": "nova",
"api_key": "你的openai api key"
},
"chatterbot": {
"name": "bot",
"db_path": "db.sqlite3"
Expand Down
2 changes: 1 addition & 1 deletion requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ multidict==6.0.4
murmurhash==1.0.9
mypy-extensions==1.0.0
numexpr==2.8.4
openai==0.27.2
openai==1.2.0
openapi-schema-pydantic==1.2.4
packaging==23.1
parso==0.8.3
Expand Down
11 changes: 11 additions & 0 deletions tests/test_openai_tts/hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from gradio_client import Client

client = Client("https://ysharma-openai-tts-new.hf.space/--replicas/zcq5n/")
result = client.predict(
"你好", # str in 'Input text' Textbox component
"tts-1", # Literal[tts-1, tts-1-hd] in 'Model' Dropdown component
"nova", # Literal[alloy, echo, fable, onyx, nova, shimmer] in 'Voice Options' Dropdown component
"sk-", # str in 'OpenAI API Key' Textbox component
api_name="/tts_enter_key"
)
print(f"音频合成成功,输出到={result}")
45 changes: 45 additions & 0 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,28 @@ async def voice_change_and_put_to_queue(message, voice_tmp_path):
except Exception as e:
logging.error(traceback.format_exc())
return
elif message["tts_type"] == "openai_tts":
try:
data = {
"type": message["data"]["type"],
"api_ip_port": message["data"]["api_ip_port"],
"model": message["data"]["model"],
"voice": message["data"]["voice"],
"api_key": message["data"]["api_key"],
"content": message["content"]
}

# 调用接口合成语音
voice_tmp_path = self.my_tts.openai_tts_api(data)
logging.info(f"openai_tts合成成功,合成内容:【{message['content']}】,输出到={voice_tmp_path}")

if voice_tmp_path is None:
return

await voice_change_and_put_to_queue(message, voice_tmp_path)
except Exception as e:
logging.error(traceback.format_exc())
return


# 音频变速
Expand Down Expand Up @@ -950,6 +972,7 @@ async def copywriting_synthesis_audio(self, file_path, out_audio_path="out/"):
edge_tts_config = self.config.get("edge-tts")
bark_gui = self.config.get("bark_gui")
vall_e_x = self.config.get("vall_e_x")
openai_tts = self.config.get("openai_tts")
genshinvoice_top = self.config.get("genshinvoice_top")
file_path = os.path.join(file_path)

Expand Down Expand Up @@ -1163,6 +1186,28 @@ async def voice_change_and_put_to_queue(voice_tmp_path):
voice_tmp_path = await self.my_tts.genshinvoice_top_api(content)
logging.info(f"genshinvoice_top合成成功,合成内容:【{content}】,输出到={voice_tmp_path}")

if voice_tmp_path is None:
return

await voice_change_and_put_to_queue(voice_tmp_path)
except Exception as e:
logging.error(traceback.format_exc())
return
elif audio_synthesis_type == "openai_tts":
try:
data = {
"type": openai_tts["type"],
"api_ip_port": openai_tts["api_ip_port"],
"model": openai_tts["model"],
"voice": openai_tts["voice"],
"api_key": openai_tts["api_key"],
"content": content
}

# 调用接口合成语音
voice_tmp_path = self.my_tts.openai_tts_api(data)
logging.info(f"openai_tts合成成功,合成内容:【{content}】,输出到={voice_tmp_path}")

if voice_tmp_path is None:
return

Expand Down
38 changes: 38 additions & 0 deletions utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,41 @@ async def genshinvoice_top_api(self, text):
logging.error(f'genshinvoice.top未知错误: {e}')

return None


# 请求OpenAI_TTS的api
def openai_tts_api(self, data):
try:
if data["type"] == "huggingface":
client = Client(data["api_ip_port"])
result = client.predict(
data["content"], # str in 'Text' Textbox component
data["model"], # Literal[tts-1, tts-1-hd] in 'Model' Dropdown component
data["voice"], # Literal[alloy, echo, fable, onyx, nova, shimmer] in 'Voice Options' Dropdown component
data["api_key"], # str in 'OpenAI API Key' Textbox component
api_name="/tts_enter_key"
)

new_file_path = self.common.move_file(result, os.path.join(self.audio_out_path, 'openai_tts_' + self.common.get_bj_time(4)), 'openai_tts_' + self.common.get_bj_time(4), "mp3")

return new_file_path
elif data["type"] == "api":
from openai import OpenAI

client = OpenAI(api_key=data["api_key"])

response = client.audio.speech.create(
model=data["model"],
voice=data["voice"],
input=data["content"]
)

file_name = 'openai_tts_' + self.common.get_bj_time(4) + '.mp3'
voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name)

response.stream_to_file(voice_tmp_path)

return voice_tmp_path
except Exception as e:
logging.error(f'OpenAI_TTS请求失败: {e}')
return None
29 changes: 29 additions & 0 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,12 @@ def common_textarea_handle(content):
config_data["vall_e_x"]["accent"] = select_vall_e_x_accent.value
config_data["vall_e_x"]["voice_preset"] = input_vall_e_x_voice_preset.value
config_data["vall_e_x"]["voice_preset_file_path"] = input_vall_e_x_voice_preset_file_path.value

config_data["openai_tts"]["type"] = select_openai_tts_type.value
config_data["openai_tts"]["api_ip_port"] = input_openai_tts_api_ip_port.value
config_data["openai_tts"]["model"] = select_openai_tts_model.value
config_data["openai_tts"]["voice"] = select_openai_tts_voice.value
config_data["openai_tts"]["api_key"] = input_openai_tts_api_key.value

"""
SVC
Expand Down Expand Up @@ -740,6 +746,7 @@ def common_textarea_handle(content):
'genshinvoice_top': 'genshinvoice_top',
'bark_gui': 'bark_gui',
'vall_e_x': 'VALL-E-X',
'openai_tts': 'OpenAI TTS',
},
value=config.get("audio_synthesis_type")
).style("width:200px;")
Expand Down Expand Up @@ -1377,6 +1384,28 @@ def common_textarea_handle(content):

input_vall_e_x_voice_preset = ui.input(label='voice preset', placeholder='VALL-E-X说话人预设名(Prompt name)', value=config.get("vall_e_x", "voice_preset")).style("width:300px;")
input_vall_e_x_voice_preset_file_path = ui.input(label='voice_preset_file_path', placeholder='VALL-E-X说话人预设文件路径(npz)', value=config.get("vall_e_x", "voice_preset_file_path")).style("width:300px;")
with ui.card().style("margin:10px 0px;background: linear-gradient(45deg, #3494E6, #EC6EAD);"):
ui.label("OpenAI TTS")
with ui.row():
select_openai_tts_type = ui.select(
label='类型',
options={'api': 'api', 'huggingface': 'huggingface'},
value=config.get("openai_tts", "type")
).style("width:200px;")
input_openai_tts_api_ip_port = ui.input(label='API地址', value=config.get("openai_tts", "api_ip_port"), placeholder='huggingface上对应项目的API地址').style("width:200px;")
with ui.row():
select_openai_tts_model = ui.select(
label='模型',
options={'tts-1': 'tts-1', 'tts-1-hd': 'tts-1-hd'},
value=config.get("openai_tts", "model")
).style("width:200px;")
select_openai_tts_voice = ui.select(
label='说话人',
options={'alloy': 'alloy', 'echo': 'echo', 'fable': 'fable', 'onyx': 'onyx', 'nova': 'nova', 'shimmer': 'shimmer'},
value=config.get("openai_tts", "voice")
).style("width:200px;")
input_openai_tts_api_key = ui.input(label='api key', value=config.get("openai_tts", "api_key"), placeholder='OpenAI API KEY').style("width:200px;")

with ui.tab_panel(svc_page).style("background: linear-gradient(45deg, #3494E6, #EC6EAD);"):
with ui.card().style("margin:10px 0px;background: linear-gradient(45deg, #3494E6, #EC6EAD);"):
ui.label("DDSP-SVC")
Expand Down

0 comments on commit 525e703

Please sign in to comment.