Skip to content

Commit

Permalink
Merge pull request Ikaros-521#996 from Ikaros-521/owner
Browse files Browse the repository at this point in the history
新增:对接CosyVoice 0819的API
  • Loading branch information
Ikaros-521 authored Sep 12, 2024
2 parents b39524f + 78fe45d commit a19cb81
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 2 deletions.
7 changes: 7 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -894,13 +894,20 @@
"cosyvoice": {
"type": "gradio_0707",
"gradio_ip_port": "http://127.0.0.1:9886",
"api_ip_port": "http://127.0.0.1:9880",
"gradio_0707": {
"mode_checkbox_group": "预训练音色",
"sft_dropdown": "中文女",
"prompt_text": "",
"prompt_wav_upload": "",
"instruct_text": "",
"seed": 0
},
"api_0819": {
"speaker": "中文女",
"new": 0,
"speed": 1.0,
"streaming": 0
}
},
"choose_song": {
Expand Down
7 changes: 7 additions & 0 deletions config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -894,13 +894,20 @@
"cosyvoice": {
"type": "gradio_0707",
"gradio_ip_port": "http://127.0.0.1:9886",
"api_ip_port": "http://127.0.0.1:9880",
"gradio_0707": {
"mode_checkbox_group": "预训练音色",
"sft_dropdown": "中文女",
"prompt_text": "",
"prompt_wav_upload": "",
"instruct_text": "",
"seed": 0
},
"api_0819": {
"speaker": "中文女",
"new": 0,
"speed": 1.0,
"streaming": 0
}
},
"choose_song": {
Expand Down
61 changes: 61 additions & 0 deletions tests/test_cosyvoice/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import json, logging, asyncio
import aiohttp, requests, ssl
from urllib.parse import urlencode
import traceback
from urllib.parse import urljoin

async def download_audio(type: str, file_url: str, timeout: int=30, request_type: str="get", data=None, json_data=None, audio_suffix: str="wav"):
async with aiohttp.ClientSession() as session:
try:
if request_type == "get":
async with session.get(file_url, params=data, timeout=timeout) as response:
if response.status == 200:
content = await response.read()
voice_tmp_path = '1.wav'
with open(voice_tmp_path, 'wb') as file:
file.write(content)
return voice_tmp_path
else:
logging.error(f'{type} 下载音频失败: {response.status}')
return None
else:
async with session.post(file_url, data=data, json=json_data, timeout=timeout) as response:
if response.status == 200:
content = await response.read()
voice_tmp_path = '1.wav'
with open(voice_tmp_path, 'wb') as file:
file.write(content)
return voice_tmp_path
else:
logging.error(f'{type} 下载音频失败: {response.status}')
return None
except asyncio.TimeoutError:
logging.error("{type} 下载音频超时")
return None

async def cosyvoice_api(text):
url = 'http://127.0.0.1:9880/'

params = {
"text": text,
"speaker": "中文女",
'new': 0,
'speed': 1.0,
'streaming': 0
}

logging.debug(f"params={params}")

try:
audio_path = await download_audio("cosyvoice", url, 30, request_type="post", json_data=params)
print(audio_path)
return audio_path
except Exception as e:
logging.error(traceback.format_exc())
logging.error(f'cosyvoice未知错误: {e}')

return None


if __name__ == '__main__':
asyncio.run(cosyvoice_api("你好"))
4 changes: 4 additions & 0 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,9 @@ async def tts_handle(self, message):
data = {
"type": message["data"]["type"],
"gradio_ip_port": message["data"]["gradio_ip_port"],
"api_ip_port": message["data"]["api_ip_port"],
"gradio_0707": message["data"]["gradio_0707"],
"api_0819": message["data"]["api_0819"],
"content": message["content"],
}

Expand Down Expand Up @@ -2084,7 +2086,9 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type="
data = {
"type": self.config.get("cosyvoice", "type"),
"gradio_ip_port": self.config.get("cosyvoice", "gradio_ip_port"),
"api_ip_port": self.config.get("cosyvoice", "api_ip_port"),
"gradio_0707": self.config.get("cosyvoice", "gradio_0707"),
"api_0819": self.config.get("cosyvoice", "api_0819"),
"content": content
}
# 调用接口合成语音
Expand Down
20 changes: 20 additions & 0 deletions utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,26 @@ async def cosyvoice_api(self, data):
new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'cosyvoice_' + self.common.get_bj_time(4)), 'cosyvoice_' + self.common.get_bj_time(4))

return new_file_path
elif data["type"] == "api_0819":
url = data["api_ip_port"]

params = {
"text": data["content"],
"speaker": data["api_0819"]["speaker"],
'new': int(data["api_0819"]["new"]),
'speed': float(data["api_0819"]["speed"]),
'streaming': int(data["api_0819"]["streaming"])
}

logger.debug(f"params={params}")

try:
return await self.download_audio("cosyvoice", url, self.timeout, request_type="post", json_data=params)
except Exception as e:
logger.error(traceback.format_exc())
logger.error(f'cosyvoice未知错误,请检查您的CosyVoice API是否启动/配置是否正确,报错内容: {e}')

return None
except Exception as e:
logger.error(traceback.format_exc())
logger.error(f'CosyVoice未知错误,请检查您的CosyVoice WebUI是否启动/配置是否正确,报错内容: {e}')
Expand Down
25 changes: 23 additions & 2 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -2518,13 +2518,18 @@ def common_textarea_handle(content):
if config.get("webui", "show_card", "tts", "cosyvoice"):
config_data["cosyvoice"]["type"] = select_cosyvoice_type.value
config_data["cosyvoice"]["gradio_ip_port"] = input_cosyvoice_gradio_ip_port.value
config_data["cosyvoice"]["api_ip_port"] = input_cosyvoice_api_ip_port.value
config_data["cosyvoice"]["gradio_0707"]["mode_checkbox_group"] = select_cosyvoice_gradio_0707_mode_checkbox_group.value
config_data["cosyvoice"]["gradio_0707"]["sft_dropdown"] = select_cosyvoice_gradio_0707_sft_dropdown.value
config_data["cosyvoice"]["gradio_0707"]["prompt_text"] = input_cosyvoice_gradio_0707_prompt_text.value
config_data["cosyvoice"]["gradio_0707"]["prompt_wav_upload"] = input_cosyvoice_gradio_0707_prompt_wav_upload.value
config_data["cosyvoice"]["gradio_0707"]["instruct_text"] = input_cosyvoice_gradio_0707_instruct_text.value
config_data["cosyvoice"]["gradio_0707"]["seed"] = int(input_cosyvoice_gradio_0707_seed.value)

config_data["cosyvoice"]["api_0819"]["speaker"] = input_cosyvoice_api_0819_speaker.value
config_data["cosyvoice"]["api_0819"]["new"] = int(input_cosyvoice_api_0819_new.value)
config_data["cosyvoice"]["api_0819"]["speed"] = round(float(input_cosyvoice_api_0819_speed.value), 2)

"""
SVC
"""
Expand Down Expand Up @@ -5582,7 +5587,7 @@ async def fish_speech_load_model(data):
with ui.row():
select_cosyvoice_type = ui.select(
label='类型',
options={"gradio_0707": "gradio_0707"},
options={"api_0819": "api_0819", "gradio_0707": "gradio_0707"},
value=config.get("cosyvoice", "type")
).style("width:150px").tooltip("对接的API类型")
input_cosyvoice_gradio_ip_port = ui.input(
Expand All @@ -5593,9 +5598,18 @@ async def fish_speech_load_model(data):
'请输入正确格式的URL': lambda value: common.is_url_check(value),
}
).style("width:200px;").tooltip("对接webui的gradio接口,填webui的地址")
input_cosyvoice_api_ip_port = ui.input(
label='HTTP API地址',
value=config.get("cosyvoice", "api_ip_port"),
placeholder='API程序启动后,API请求地址',
validation={
'请输入正确格式的URL': lambda value: common.is_url_check(value),
}
).style("width:200px;").tooltip("对接api接口,填api端点地址")

with ui.row():
with ui.card().style(card_css):
ui.label("gradio_0707")
with ui.row():
select_cosyvoice_gradio_0707_mode_checkbox_group = ui.select(
label='推理模式',
Expand All @@ -5611,7 +5625,14 @@ async def fish_speech_load_model(data):
input_cosyvoice_gradio_0707_prompt_wav_upload = ui.input(label='prompt音频路径', value=config.get("cosyvoice", "gradio_0707", "prompt_wav_upload"), placeholder='例如:E:\\1.wav').style("width:200px;").tooltip("不用就留空,例如:E:\\1.wav")
input_cosyvoice_gradio_0707_instruct_text = ui.input(label='instruct文本', value=config.get("cosyvoice", "gradio_0707", "instruct_text"), placeholder='').style("width:200px;").tooltip("不用就留空")
input_cosyvoice_gradio_0707_seed = ui.input(label='随机推理种子', value=config.get("cosyvoice", "gradio_0707", "seed"), placeholder='默认:0').style("width:100px;").tooltip("随机推理种子")

with ui.row():
with ui.card().style(card_css):
ui.label("api_0819")
with ui.row():
input_cosyvoice_api_0819_speaker = ui.input(label='说话人', value=config.get("cosyvoice", "api_0819", "speaker"), placeholder='').style("width:200px;").tooltip("自行查看")
input_cosyvoice_api_0819_new = ui.input(label='new', value=config.get("cosyvoice", "api_0819", "new"), placeholder='0').style("width:200px;").tooltip("自行查看")
input_cosyvoice_api_0819_speed = ui.input(label='语速', value=config.get("cosyvoice", "api_0819", "speed"), placeholder='1').style("width:200px;").tooltip("语速")

with ui.tab_panel(svc_page).style(tab_panel_css):
if config.get("webui", "show_card", "svc", "ddsp_svc"):
with ui.card().style(card_css):
Expand Down

0 comments on commit a19cb81

Please sign in to comment.