Skip to content

Commit

Permalink
接入clone-voice(新TTS)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Feb 9, 2024
1 parent db53421 commit ff8a5af
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 7 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

`Luna AI` 的外观由 `Live2D、Vtube Studio、xuniren 和 UE5 结合 Audio2Face` 技术打造,为用户提供了一个生动、互动的虚拟形象。这使得 `Luna AI` 能够在各大直播平台,如 `Bilibili、抖音、快手、微信视频号、斗鱼、YouTube、Twitch 和 TikTok`,进行实时互动直播。当然,它也可以在本地环境中与您进行个性化对话。

为了使交流更加自然,`Luna AI` 使用了先进的自然语言处理技术,结合文本转语音系统,如 `Edge-TTS、VITS-Fast、elevenlabs、bark-gui、VALL-E-X、睿声AI、genshinvoice.top、tts.ai-lab.top、OpenVoice 和 GPT_SoVITS`。这不仅让它能够生成流畅的回答,还可以通过 `so-vits-svc 和 DDSP-SVC` 实现声音的变化,以适应不同的场景和角色。
为了使交流更加自然,`Luna AI` 使用了先进的自然语言处理技术,结合文本转语音系统,如 `Edge-TTS、VITS-Fast、elevenlabs、bark-gui、VALL-E-X、睿声AI、genshinvoice.top、tts.ai-lab.top、OpenVoice、GPT_SoVITSclone-voice`。这不仅让它能够生成流畅的回答,还可以通过 `so-vits-svc 和 DDSP-SVC` 实现声音的变化,以适应不同的场景和角色。

此外,`Luna AI` 还能够通过特定指令与 `Stable Diffusion` 协作,展示画作。用户还可以自定义文案,让 Luna AI 循环播放,以满足不同场合的需求。

Expand Down
12 changes: 10 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@
"voiceId": "b4b885c3-89a7-46d4-badb-015a55bb3a91"
},
"gradio_tts": {
"request_parameters": "{{\"url\": \"https://v2.genshinvoice.top/\", \"fn_index\": 0, \"data_analysis\": 1, \"text_input\": \"{content}\", \"speaker_option\": \"派蒙_ZH\", \"sdp_ratio\": 0.5, \"noise\": 0.6, \"noise_w\": 0.9, \"length\": 1, \"language\": \"ZH\", \"audio_prompt_url\": null, \"text_prompt\": \"Happy\", \"prompt_mode\": \"Text prompt\", \"auxiliary_text\": \"\", \"weight\": 0.7}}"
"request_parameters": "{{\"url\": \"https://xzjosh-nana7mi-bert-vits2.hf.space/--replicas/b9be4/\", \"fn_index\": 0, \"data_analysis\": 1, \"text_input\": \"{content}\", \"speaker_option\": \"Nana7mi\", \"sdp_ratio\": 0.5, \"noise\": 0.6, \"noise_w\": 0.9, \"length\": 1}}"
},
"gpt_sovits": {
"type": "gradio",
Expand All @@ -509,6 +509,13 @@
"emotion": "正常"
}
},
"clone_voice": {
"type": "tts",
"api_ip_port": "http://127.0.0.1:9988",
"voice": "cn-nan.wav",
"language": "zh-cn",
"speed": 1
},
"choose_song": {
"enable": true,
"similarity": 0.5,
Expand Down Expand Up @@ -1240,7 +1247,8 @@
"openai_tts": true,
"reecho_ai": true,
"gradio_tts": true,
"gpt_sovits": true
"gpt_sovits": true,
"clone_voice": true
},
"svc": {
"ddsp_svc": true,
Expand Down
12 changes: 10 additions & 2 deletions config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@
"voiceId": "b4b885c3-89a7-46d4-badb-015a55bb3a91"
},
"gradio_tts": {
"request_parameters": "{{\"url\": \"https://v2.genshinvoice.top/\", \"fn_index\": 0, \"data_analysis\": 1, \"text_input\": \"{content}\", \"speaker_option\": \"派蒙_ZH\", \"sdp_ratio\": 0.5, \"noise\": 0.6, \"noise_w\": 0.9, \"length\": 1, \"language\": \"ZH\", \"audio_prompt_url\": null, \"text_prompt\": \"Happy\", \"prompt_mode\": \"Text prompt\", \"auxiliary_text\": \"\", \"weight\": 0.7}}"
"request_parameters": "{{\"url\": \"https://xzjosh-nana7mi-bert-vits2.hf.space/--replicas/b9be4/\", \"fn_index\": 0, \"data_analysis\": 1, \"text_input\": \"{content}\", \"speaker_option\": \"Nana7mi\", \"sdp_ratio\": 0.5, \"noise\": 0.6, \"noise_w\": 0.9, \"length\": 1}}"
},
"gpt_sovits": {
"type": "gradio",
Expand All @@ -509,6 +509,13 @@
"emotion": "正常"
}
},
"clone_voice": {
"type": "tts",
"api_ip_port": "http://127.0.0.1:9988",
"voice": "cn-nan.wav",
"language": "zh-cn",
"speed": 1
},
"choose_song": {
"enable": true,
"similarity": 0.5,
Expand Down Expand Up @@ -1240,7 +1247,8 @@
"openai_tts": true,
"reecho_ai": true,
"gradio_tts": true,
"gpt_sovits": true
"gpt_sovits": true,
"clone_voice": true
},
"svc": {
"ddsp_svc": true,
Expand Down
40 changes: 40 additions & 0 deletions tests/test_clone_voice/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json, logging, asyncio
import aiohttp, requests, ssl
from urllib.parse import urlencode
import traceback
from urllib.parse import urljoin

async def clone_voice_api(text):
url = 'http://127.0.0.1:9988/tts'

# voice=cn-nan.wav&text=%E4%BD%A0%E5%A5%BD&language=zh-cn&speed=1
params = {
"voice": "cn-nan.wav",
"language": "zh-cn",
'speed': 1,
'text': text
}

print(f"params={params}")

try:
async with aiohttp.ClientSession() as session:
async with session.post(url, data=params) as response:
ret = await response.json()
print(ret)

file_path = ret["filename"]

return file_path

except aiohttp.ClientError as e:
logging.error(traceback.format_exc())
logging.error(f'clone_voice请求失败: {e}')
except Exception as e:
logging.error(traceback.format_exc())
logging.error(f'clone_voice未知错误: {e}')

return None


asyncio.run(clone_voice_api("你好"))
26 changes: 25 additions & 1 deletion utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,17 @@ async def voice_change_and_put_to_queue(message, voice_tmp_path):
}

voice_tmp_path = await self.my_tts.gpt_sovits_api(data)
elif message["tts_type"] == "clone_voice":
data = {
"type": message["data"]["type"],
"api_ip_port": message["data"]["api_ip_port"],
"voice": message["data"]["voice"],
"language": message["data"]["language"],
"speed": message["data"]["speed"],
"content": message["content"]
}

voice_tmp_path = await self.my_tts.clone_voice_api(data)
elif message["tts_type"] == "none":
pass
except Exception as e:
Expand Down Expand Up @@ -1497,7 +1508,20 @@ async def voice_change_and_put_to_queue(voice_tmp_path):

# 调用接口合成语音
voice_tmp_path = await self.my_tts.gpt_sovits_api(content)


elif audio_synthesis_type == "clone_voice":
data = {
"type": self.config.get("clone_voice", "type"),
"api_ip_port": self.config.get("clone_voice", "api_ip_port"),
"voice": self.config.get("clone_voice", "voice"),
"language": self.config.get("clone_voice", "language"),
"speed": self.config.get("clone_voice", "speed"),
"content": content
}

# 调用接口合成语音
voice_tmp_path = await self.my_tts.clone_voice_api(content)

if voice_tmp_path is None:
raise Exception(f"{audio_synthesis_type}合成失败")

Expand Down
33 changes: 33 additions & 0 deletions utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,3 +667,36 @@ async def websocket_client_logic(websocket, data_json):
logging.error(f'gpt_sovits未知错误,请检查您的gpt_sovits推理是否启动/配置是否正确,报错内容: {e}')

return None


async def clone_voice_api(self, data):
API_URL = urljoin(data["api_ip_port"], '/tts')

# voice=cn-nan.wav&text=%E4%BD%A0%E5%A5%BD&language=zh-cn&speed=1
params = {
"voice": data["voice"],
"language": data["language"],
"speed": data["speed"],
"text": data["content"]
}

logging.debug(f"params={params}")

try:
async with aiohttp.ClientSession() as session:
async with session.post(API_URL, data=params) as response:
ret = await response.json()
logging.debug(ret)

file_path = ret["filename"]

return file_path

except aiohttp.ClientError as e:
logging.error(traceback.format_exc())
logging.error(f'clone_voice请求失败: {e}')
except Exception as e:
logging.error(traceback.format_exc())
logging.error(f'clone_voice未知错误: {e}')

return None
34 changes: 33 additions & 1 deletion webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,14 @@ def common_textarea_handle(content):
config_data["gpt_sovits"]["webtts"]["lang"] = select_gpt_sovits_webtts_lang.value
config_data["gpt_sovits"]["webtts"]["speed"] = input_gpt_sovits_webtts_speed.value
config_data["gpt_sovits"]["webtts"]["emotion"] = input_gpt_sovits_webtts_emotion.value


if config.get("webui", "show_card", "tts", "clone_voice"):
config_data["clone_voice"]["type"] = select_clone_voice_type.value
config_data["clone_voice"]["api_ip_port"] = input_clone_voice_api_ip_port.value
config_data["clone_voice"]["voice"] = input_clone_voice_voice.value
config_data["clone_voice"]["language"] = select_clone_voice_language.value
config_data["clone_voice"]["speed"] = float(input_clone_voice_speed.value)

"""
SVC
"""
Expand Down Expand Up @@ -1541,6 +1548,7 @@ def common_textarea_handle(content):
config_data["webui"]["show_card"]["tts"]["reecho_ai"] = switch_webui_show_card_tts_reecho_ai.value
config_data["webui"]["show_card"]["tts"]["gradio_tts"] = switch_webui_show_card_tts_gradio_tts.value
config_data["webui"]["show_card"]["tts"]["gpt_sovits"] = switch_webui_show_card_tts_gpt_sovits.value
config_data["webui"]["show_card"]["tts"]["clone_voice"] = switch_webui_show_card_tts_clone_voice.value

config_data["webui"]["show_card"]["svc"]["ddsp_svc"] = switch_webui_show_card_svc_ddsp_svc.value
config_data["webui"]["show_card"]["svc"]["so_vits_svc"] = switch_webui_show_card_svc_so_vits_svc.value
Expand Down Expand Up @@ -1694,6 +1702,7 @@ def common_textarea_handle(content):
'reecho_ai': '睿声AI',
'gradio_tts': 'Gradio',
'gpt_sovits': 'GPT_SoVITS',
'clone_voice': 'clone-voice'
},
value=config.get("audio_synthesis_type")
).style("width:200px;")
Expand Down Expand Up @@ -2902,6 +2911,27 @@ def common_textarea_handle(content):
).style("width:200px;")
input_gpt_sovits_webtts_speed = ui.input(label='语速', value=config.get("gpt_sovits", "webtts", "speed"), placeholder='语速').style("width:200px;")
input_gpt_sovits_webtts_emotion = ui.input(label='情感', value=config.get("gpt_sovits", "webtts", "emotion"), placeholder='情感').style("width:200px;")

if config.get("webui", "show_card", "tts", "clone_voice"):
with ui.card().style(card_css):
ui.label("clone-voice")
with ui.row():
select_clone_voice_type = ui.select(
label='API接口类型',
options={'tts':'tts'},
value=config.get("clone_voice", "type")
).style("width:100px;")
input_clone_voice_api_ip_port = ui.input(label='API地址', value=config.get("clone_voice", "api_ip_port"), placeholder='官方程序启动后监听的地址').style("width:200px;")
with ui.row():
input_clone_voice_voice = ui.input(label='参考音频路径', value=config.get("clone_voice", "voice"), placeholder='参考音频路径,建议填绝对路径').style("width:200px;")
select_clone_voice_language = ui.select(
label='需要合成的语种',
options={'zh-cn':'中文', 'ja':'日文', 'en':'英文',"ko":'ko',"es":'es',"de":'de',
"fr":'fr',"it":'it',"tr":'tr',"ru":'ru',"pt":'pt',"pl":'pl',"nl":'nl',"ar":'ar',"hu":'hu',"cs":'cs'},
value=config.get("clone_voice", "language")
).style("width:200px;")
input_clone_voice_speed = ui.input(label='语速', value=config.get("clone_voice", "speed"), placeholder='语速').style("width:100px;")

with ui.tab_panel(svc_page).style(tab_panel_css):
if config.get("webui", "show_card", "svc", "ddsp_svc"):
with ui.card().style(card_css):
Expand Down Expand Up @@ -3472,6 +3502,8 @@ def update_echart_gift():
switch_webui_show_card_tts_reecho_ai = ui.switch('reecho_ai', value=config.get("webui", "show_card", "tts", "reecho_ai")).style(switch_internal_css)
switch_webui_show_card_tts_gradio_tts = ui.switch('gradio', value=config.get("webui", "show_card", "tts", "gradio_tts")).style(switch_internal_css)
switch_webui_show_card_tts_gpt_sovits = ui.switch('gpt_sovits', value=config.get("webui", "show_card", "tts", "gpt_sovits")).style(switch_internal_css)
switch_webui_show_card_tts_clone_voice = ui.switch('clone_voice', value=config.get("webui", "show_card", "tts", "clone_voice")).style(switch_internal_css)

with ui.card().style(card_css):
ui.label("变声")
with ui.row():
Expand Down

0 comments on commit ff8a5af

Please sign in to comment.