Skip to content

Commit

Permalink
对接CosyVoice
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Jul 8, 2024
1 parent c06aaa0 commit 60e43e1
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

`Luna AI` 的外观由 `Live2D、Vtube Studio、xuniren、UE5 结合 Audio2Face、EasyAIVtuber、数字人视频播放器(Easy-Wav2Lip、Sadtalker、GeneFace++、MuseTalk、本地视频)、metahuman-stream(ernerf、musetalk、wav2lip)` 技术打造,为用户提供了一个生动、互动的虚拟形象。这使得 `Luna AI` 能够在各大直播平台,如 `Bilibili、抖音、快手、微信视频号、拼多多、1688、斗鱼、YouTube、Twitch 和 TikTok`,进行实时互动直播。当然,它也可以在本地环境中与您进行个性化对话。

为了使交流更加自然,`Luna AI` 使用了先进的自然语言处理技术,结合文本转语音系统,如 `Edge-TTS、VITS-Fast、elevenlabs、bark-gui、VALL-E-X、睿声AI、genshinvoice.top、tts.ai-lab.top、OpenVoice、GPT_SoVITS、clone-voice、Azure TTS、fish-speech、ChatTTS`。这不仅让它能够生成流畅的回答,还可以通过 `so-vits-svc 和 DDSP-SVC` 实现声音的变化,以适应不同的场景和角色。
为了使交流更加自然,`Luna AI` 使用了先进的自然语言处理技术,结合文本转语音系统,如 `Edge-TTS、VITS-Fast、elevenlabs、bark-gui、VALL-E-X、睿声AI、genshinvoice.top、tts.ai-lab.top、OpenVoice、GPT_SoVITS、clone-voice、Azure TTS、fish-speech、ChatTTS、CosyVoice`。这不仅让它能够生成流畅的回答,还可以通过 `so-vits-svc 和 DDSP-SVC` 实现声音的变化,以适应不同的场景和角色。

此外,`Luna AI` 还能够通过特定指令与 `Stable Diffusion` 协作,展示画作。用户还可以自定义文案,让 Luna AI 循环播放,以满足不同场合的需求。

Expand Down
15 changes: 14 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,18 @@
"streaming": 0
}
},
"cosyvoice": {
"type": "gradio_0707",
"gradio_ip_port": "http://127.0.0.1:9886",
"gradio_0707": {
"mode_checkbox_group": "预训练音色",
"sft_dropdown": "中文女",
"prompt_text": "",
"prompt_wav_upload": "",
"instruct_text": "",
"seed": 0
}
},
"choose_song": {
"enable": false,
"similarity": 0.5,
Expand Down Expand Up @@ -1846,7 +1858,8 @@
"clone_voice": true,
"azure_tts": true,
"fish_speech": true,
"chattts": true
"chattts": true,
"cosyvoice": true
},
"svc": {
"ddsp_svc": true,
Expand Down
15 changes: 14 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,18 @@
"streaming": 0
}
},
"cosyvoice": {
"type": "gradio_0707",
"gradio_ip_port": "http://127.0.0.1:9886",
"gradio_0707": {
"mode_checkbox_group": "预训练音色",
"sft_dropdown": "中文女",
"prompt_text": "",
"prompt_wav_upload": "",
"instruct_text": "",
"seed": 0
}
},
"choose_song": {
"enable": false,
"similarity": 0.5,
Expand Down Expand Up @@ -1846,7 +1858,8 @@
"clone_voice": true,
"azure_tts": true,
"fish_speech": true,
"chattts": true
"chattts": true,
"cosyvoice": true
},
"svc": {
"ddsp_svc": true,
Expand Down
Binary file modified docs/AI Vtuber.xmind
Binary file not shown.
Binary file modified docs/xmind.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions tests/test_cosyvoice/gradio_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from gradio_client import Client, file

client = Client("http://127.0.0.1:9886/")
result = client.predict(
tts_text="合成能力。",
mode_checkbox_group="预训练音色",
sft_dropdown="中文女",
prompt_text="",
prompt_wav_upload=None,
prompt_wav_record=None,
#prompt_wav_upload=file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'),
#prompt_wav_record=file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'),
instruct_text="",
seed=0,
api_name="/generate_audio"
)
print(result)
20 changes: 20 additions & 0 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,17 @@ async def tts_handle(self, message):
}

voice_tmp_path = await self.my_tts.chattts_api(data)
elif message["tts_type"] == "cosyvoice":
logger.info(message)
data = {
"type": message["data"]["type"],
"gradio_ip_port": message["data"]["gradio_ip_port"],
"gradio_0707": message["data"]["gradio_0707"],
"content": message["content"],
}

voice_tmp_path = await self.my_tts.cosyvoice_api(data)

elif message["tts_type"] == "none":
# Audio.voice_tmp_path_queue.put(message)
voice_tmp_path = None
Expand Down Expand Up @@ -2004,6 +2015,15 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type="
}
# 调用接口合成语音
voice_tmp_path = await self.my_tts.chattts_api(data)
elif audio_synthesis_type == "cosyvoice":
data = {
"type": self.config.get("cosyvoice", "type"),
"gradio_ip_port": self.config.get("cosyvoice", "gradio_ip_port"),
"gradio_0707": self.config.get("cosyvoice", "gradio_0707"),
"content": content
}
# 调用接口合成语音
voice_tmp_path = await self.my_tts.cosyvoice_api(data)

return voice_tmp_path

Expand Down
47 changes: 47 additions & 0 deletions utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,3 +1202,50 @@ async def chattts_api(self, data):
logger.error(f'ChatTTS未知错误,请检查您的ChatTTS WebUI是否启动/配置是否正确,报错内容: {e}')

return None

# CosyVoice (gradio_client-0.16.4,版本太低没法用喵)
async def cosyvoice_api(self, data):
"""CosyVoice Gradio的API对接喵
Args:
data (dict): 传参数据喵
Returns:
str: 音频路径
"""
try:
if data["type"] == "gradio_0707":
from gradio_client import Client, file

client = Client(data["gradio_ip_port"])

if data["gradio_0707"]["prompt_wav_upload"] == "":
prompt_wav_upload = None
else:
prompt_wav_upload = file(data["gradio_0707"]["prompt_wav_upload"])

result = client.predict(
tts_text=data["content"] + "。",
mode_checkbox_group=data["gradio_0707"]["mode_checkbox_group"],
sft_dropdown=data["gradio_0707"]["sft_dropdown"],
prompt_text=data["gradio_0707"]["prompt_text"],
prompt_wav_upload=prompt_wav_upload,
prompt_wav_record=None,
instruct_text=data["gradio_0707"]["instruct_text"],
seed=int(data["gradio_0707"]["seed"]),
api_name="/generate_audio"
)

new_file_path = None

if result:
voice_tmp_path = result
new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'cosyvoice_' + self.common.get_bj_time(4)), 'cosyvoice_' + self.common.get_bj_time(4))

return new_file_path
except Exception as e:
logger.error(traceback.format_exc())
logger.error(f'CosyVoice未知错误,请检查您的CosyVoice WebUI是否启动/配置是否正确,报错内容: {e}')

return None

52 changes: 50 additions & 2 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -2378,6 +2378,16 @@ def common_textarea_handle(content):
config_data["chattts"]["api"]["seed"] = int(input_chattts_api_seed.value)
config_data["chattts"]["api"]["media_type"] = input_chattts_api_media_type.value

if config.get("webui", "show_card", "tts", "cosyvoice"):
config_data["cosyvoice"]["type"] = select_cosyvoice_type.value
config_data["cosyvoice"]["gradio_ip_port"] = input_cosyvoice_gradio_ip_port.value
config_data["cosyvoice"]["gradio_0707"]["mode_checkbox_group"] = select_cosyvoice_gradio_0707_mode_checkbox_group.value
config_data["cosyvoice"]["gradio_0707"]["sft_dropdown"] = select_cosyvoice_gradio_0707_sft_dropdown.value
config_data["cosyvoice"]["gradio_0707"]["prompt_text"] = input_cosyvoice_gradio_0707_prompt_text.value
config_data["cosyvoice"]["gradio_0707"]["prompt_wav_upload"] = input_cosyvoice_gradio_0707_prompt_wav_upload.value
config_data["cosyvoice"]["gradio_0707"]["instruct_text"] = input_cosyvoice_gradio_0707_instruct_text.value
config_data["cosyvoice"]["gradio_0707"]["seed"] = int(input_cosyvoice_gradio_0707_seed.value)

"""
SVC
"""
Expand Down Expand Up @@ -2733,7 +2743,8 @@ def common_textarea_handle(content):
config_data["webui"]["show_card"]["tts"]["clone_voice"] = switch_webui_show_card_tts_clone_voice.value
config_data["webui"]["show_card"]["tts"]["azure_tts"] = switch_webui_show_card_tts_azure_tts.value
config_data["webui"]["show_card"]["tts"]["fish_speech"] = switch_webui_show_card_tts_fish_speech.value
config_data["webui"]["show_card"]["tts"]["tts_chattts"] = switch_webui_show_card_tts_chattts.value
config_data["webui"]["show_card"]["tts"]["chattts"] = switch_webui_show_card_tts_chattts.value
config_data["webui"]["show_card"]["tts"]["cosyvoice"] = switch_webui_show_card_tts_cosyvoice.value

config_data["webui"]["show_card"]["svc"]["ddsp_svc"] = switch_webui_show_card_svc_ddsp_svc.value
config_data["webui"]["show_card"]["svc"]["so_vits_svc"] = switch_webui_show_card_svc_so_vits_svc.value
Expand Down Expand Up @@ -2853,6 +2864,7 @@ def save_config():
'azure_tts': 'azure_tts',
'fish_speech': 'fish_speech',
'chattts': 'ChatTTS',
'cosyvoice': 'CosyVoice',
}

# 聊天类型所有配置项
Expand Down Expand Up @@ -5336,7 +5348,42 @@ async def fish_speech_load_model(data):
with ui.row():
input_chattts_api_seed = ui.input(label='声音种子', value=config.get("chattts", "api", "seed"), placeholder='默认:2581').style("width:200px;").tooltip("声音种子")
input_chattts_api_media_type = ui.input(label='音频格式', value=config.get("chattts", "api", "media_type"), placeholder='默认:wav').style("width:200px;").tooltip("音频格式,没事不建议改")

if config.get("webui", "show_card", "tts", "cosyvoice"):
with ui.card().style(card_css):
ui.label("CosyVoice")
with ui.row():
select_cosyvoice_type = ui.select(
label='类型',
options={"gradio_0707": "gradio_0707"},
value=config.get("cosyvoice", "type")
).style("width:150px").tooltip("对接的API类型")
input_cosyvoice_gradio_ip_port = ui.input(
label='Gradio API地址',
value=config.get("cosyvoice", "gradio_ip_port"),
placeholder='官方webui程序启动后gradio监听的地址',
validation={
'请输入正确格式的URL': lambda value: common.is_url_check(value),
}
).style("width:200px;").tooltip("对接webui的gradio接口,填webui的地址")

with ui.row():
with ui.card().style(card_css):
with ui.row():
select_cosyvoice_gradio_0707_mode_checkbox_group = ui.select(
label='推理模式',
options={'预训练音色': '预训练音色', '3s极速复刻': '3s极速复刻', '跨语种复刻': '跨语种复刻', '自然语言控制': '自然语言控制'},
value=config.get("cosyvoice", "gradio_0707", "mode_checkbox_group")
).style("width:200px;")
select_cosyvoice_gradio_0707_sft_dropdown = ui.select(
label='预训练音色',
options={'中文女': '中文女', '中文男': '中文男', '日语男': '日语男', '粤语女': '粤语女', '英文女': '英文女', '英文男': '英文男', '韩语女': '韩语女'},
value=config.get("cosyvoice", "gradio_0707", "sft_dropdown")
).style("width:100px;")
input_cosyvoice_gradio_0707_prompt_text = ui.input(label='prompt文本', value=config.get("cosyvoice", "gradio_0707", "prompt_text"), placeholder='').style("width:200px;").tooltip("不用就留空")
input_cosyvoice_gradio_0707_prompt_wav_upload = ui.input(label='prompt音频路径', value=config.get("cosyvoice", "gradio_0707", "prompt_wav_upload"), placeholder='例如:E:\\1.wav').style("width:200px;").tooltip("不用就留空,例如:E:\\1.wav")
input_cosyvoice_gradio_0707_instruct_text = ui.input(label='instruct文本', value=config.get("cosyvoice", "gradio_0707", "instruct_text"), placeholder='').style("width:200px;").tooltip("不用就留空")
input_cosyvoice_gradio_0707_seed = ui.input(label='随机推理种子', value=config.get("cosyvoice", "gradio_0707", "seed"), placeholder='默认:0').style("width:100px;").tooltip("随机推理种子")

with ui.tab_panel(svc_page).style(tab_panel_css):
if config.get("webui", "show_card", "svc", "ddsp_svc"):
with ui.card().style(card_css):
Expand Down Expand Up @@ -6364,6 +6411,7 @@ def update_echart_gift():
switch_webui_show_card_tts_azure_tts = ui.switch('azure_tts', value=config.get("webui", "show_card", "tts", "azure_tts")).style(switch_internal_css)
switch_webui_show_card_tts_fish_speech = ui.switch('fish_speech', value=config.get("webui", "show_card", "tts", "fish_speech")).style(switch_internal_css)
switch_webui_show_card_tts_chattts = ui.switch('ChatTTS', value=config.get("webui", "show_card", "tts", "chattts")).style(switch_internal_css)
switch_webui_show_card_tts_cosyvoice = ui.switch('CosyVoice', value=config.get("webui", "show_card", "tts", "cosyvoice")).style(switch_internal_css)

with ui.card().style(card_css):
ui.label("变声")
Expand Down

0 comments on commit 60e43e1

Please sign in to comment.