Skip to content

Commit

Permalink
TTS新增VALL-E-X的接入
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Aug 29, 2023
1 parent 05f187a commit 17ec290
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 13 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,12 @@ for id, (key, name) in enumerate(obj.get_speakers().items()):
训练用整合包(原作者整合包备份):[https://pan.quark.cn/s/da52e78983da](https://pan.quark.cn/s/da52e78983da)
整合包视频教程:[AI声音克隆又进化了,10分钟学会声音克隆!一键启动包发布!](https://www.bilibili.com/video/BV1K94y1k7Bw)

### VALL-E-X
官方仓库:[VALL-E-X](https://github.com/Plachtaa/VALL-E-X)
个人提供的已训练好的preset预设:[点我跳转](https://github.com/Ikaros-521/VALL-E-X/releases/tag/preset)
官方视频讲解:[仅需3秒音频实现声音克隆!我开源了微软的最新语音合成模型VALL-E X](https://www.bilibili.com/video/av617666708)
整合包下载:[https://pan.quark.cn/s/b817e285ab41](https://pan.quark.cn/s/b817e285ab41)

### DDSP-SVC
官方仓库:[DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
个人提供的已训练好的模型:[点我跳转](https://github.com/Ikaros-521/DDSP-SVC/releases)
Expand Down Expand Up @@ -1192,6 +1198,7 @@ cmd运行`npm i docsify-cli -g`
- [x] 支持关闭聊天(LLM等)
- [x] langchain-chatglm的接入
- [ ] Edge-TTS在合成音频时会出现合成成功但是系统找不到文件的bug
- [ ] 抖音关注事件可以触发关注感谢话术

## 📝更新日志

Expand Down Expand Up @@ -1493,6 +1500,8 @@ cmd运行`npm i docsify-cli -g`
- 2023-08-27
- 新增web字幕打印机的接入

- 2023-08-29
- TTS新增VALL-E-X的接入

</details>

Expand Down
11 changes: 10 additions & 1 deletion UI_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def setupUi(self, MainWindow):
self.scrollArea.setWidgetResizable(True)
self.scrollArea.setObjectName("scrollArea")
self.scrollAreaWidgetContents = QtWidgets.QWidget()
self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, -8830, 984, 9572))
self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, -6514, 984, 9648))
self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
self.verticalLayout = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents)
self.verticalLayout.setContentsMargins(35, 20, 35, 20)
Expand Down Expand Up @@ -1086,6 +1086,14 @@ def setupUi(self, MainWindow):
self.gridLayout_56.addWidget(self.checkBox_bark_gui_quick_generation, 5, 1, 1, 1)
self.gridLayout_57.addLayout(self.gridLayout_56, 0, 0, 1, 1)
self.verticalLayout.addWidget(self.groupBox_bark_gui)
self.groupBox_vall_e_x = QtWidgets.QGroupBox(self.scrollAreaWidgetContents)
self.groupBox_vall_e_x.setObjectName("groupBox_vall_e_x")
self.gridLayout_70 = QtWidgets.QGridLayout(self.groupBox_vall_e_x)
self.gridLayout_70.setObjectName("gridLayout_70")
self.gridLayout_vall_e_x = QtWidgets.QGridLayout()
self.gridLayout_vall_e_x.setObjectName("gridLayout_vall_e_x")
self.gridLayout_70.addLayout(self.gridLayout_vall_e_x, 0, 0, 1, 1)
self.verticalLayout.addWidget(self.groupBox_vall_e_x)
self.groupBox_live2d = QtWidgets.QGroupBox(self.scrollAreaWidgetContents)
self.groupBox_live2d.setObjectName("groupBox_live2d")
self.gridLayout_30 = QtWidgets.QGridLayout(self.groupBox_live2d)
Expand Down Expand Up @@ -2048,6 +2056,7 @@ def retranslateUi(self, MainWindow):
self.label_bark_gui_waveform_temperature.setText(_translate("MainWindow", "波形温度"))
self.label_bark_gui_spk.setText(_translate("MainWindow", "说话人"))
self.checkBox_bark_gui_quick_generation.setText(_translate("MainWindow", "启用"))
self.groupBox_vall_e_x.setTitle(_translate("MainWindow", "VALL-E-X"))
self.groupBox_live2d.setTitle(_translate("MainWindow", "Live2D"))
self.checkBox_live2d_enable.setText(_translate("MainWindow", "是"))
self.label_live2d_port.setText(_translate("MainWindow", "端口"))
Expand Down
11 changes: 9 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
"history_enable": true,
"history_max_len": 500
},
"audio_synthesis_type": "edge-tts",
"audio_synthesis_type": "vall_e_x",
"audio_random_speed": {
"normal": {
"enable": false,
Expand Down Expand Up @@ -200,6 +200,13 @@
"seed": -1.0,
"batch_count": 1
},
"vall_e_x": {
"api_ip_port": "http://127.0.0.1:7860",
"language": "auto-detect",
"accent": "no-accent",
"voice_preset": "ikaros",
"voice_preset_file_path": "D:\\GitHub_pro\\AI-Vtuber\\tests\\test_VALL-E-X\\ikaros.npz"
},
"chatterbot": {
"name": "bot",
"db_path": "db.sqlite3"
Expand Down Expand Up @@ -353,7 +360,7 @@
}
],
"random_play": true,
"play_interval": 5000
"play_interval": 30000
},
"web_captions_printer": {
"enable": false,
Expand Down
7 changes: 7 additions & 0 deletions config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,13 @@
"seed": -1.0,
"batch_count": 1
},
"vall_e_x": {
"api_ip_port": "http://127.0.0.1:7860",
"language": "auto-detect",
"accent": "no-accent",
"voice_preset": "ikaros",
"voice_preset_file_path": "D:\\GitHub_pro\\AI-Vtuber\\tests\\test_VALL-E-X\\ikaros.npz"
},
"chatterbot": {
"name": "bot",
"db_path": "db.sqlite3"
Expand Down
126 changes: 118 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,7 @@ def init_config(self):
self.ui.lineEdit_sparkdesk_api_key.setText(self.sparkdesk_config['api_key'])

self.ui.comboBox_audio_synthesis_type.clear()
self.ui.comboBox_audio_synthesis_type.addItems(["Edge-TTS", "VITS", "VITS-Fast", "elevenlabs", "genshinvoice_top", "bark_gui"])
self.ui.comboBox_audio_synthesis_type.addItems(["Edge-TTS", "VITS", "VITS-Fast", "elevenlabs", "genshinvoice_top", "bark_gui", "VALL-E-X"])
audio_synthesis_type_index = 0
if self.audio_synthesis_type == "edge-tts":
audio_synthesis_type_index = 0
Expand All @@ -955,6 +955,8 @@ def init_config(self):
audio_synthesis_type_index = 4
elif self.audio_synthesis_type == "bark_gui":
audio_synthesis_type_index = 5
elif self.audio_synthesis_type == "vall_e_x":
audio_synthesis_type_index = 6
self.ui.comboBox_audio_synthesis_type.setCurrentIndex(audio_synthesis_type_index)

self.ui.lineEdit_vits_fast_config_path.setText(self.vits_fast_config['config_path'])
Expand Down Expand Up @@ -1115,6 +1117,9 @@ def init_config(self):
talk_google_tgt_lang_index = 2
self.ui.comboBox_talk_google_tgt_lang.setCurrentIndex(talk_google_tgt_lang_index)

"""
GUI部分 动态生成的widget
"""
# 定时任务动态加载
data_json = []
for index, tmp in enumerate(config.get("schedule")):
Expand Down Expand Up @@ -1596,6 +1601,77 @@ def web_captions_printer_gui_create():

web_captions_printer_gui_create()

# VALL-E-X
def vall_e_x_gui_create():
data_json = []
vall_e_x_config = config.get("vall_e_x")

tmp_json = {
"label_text": "API地址",
"label_tip": "VALL-E-X启动后监听的ip端口地址",
"data": vall_e_x_config["api_ip_port"],
"main_obj_name": "vall_e_x",
"index": 1
}
data_json.append(tmp_json)

tmp_json = {
"label_text": "language",
"label_tip": "VALL-E-X language",
"widget_type": "combo_box",
"combo_data_list": ['auto-detect', 'English', '中文', '日本語', 'Mix'],
"data": vall_e_x_config["language"],
"main_obj_name": "vall_e_x",
"index": 1
}
data_json.append(tmp_json)

tmp_json = {
"label_text": "accent",
"label_tip": "VALL-E-X accent",
"widget_type": "combo_box",
"combo_data_list": ['no-accent', 'English', '中文', '日本語'],
"data": vall_e_x_config["accent"],
"main_obj_name": "vall_e_x",
"index": 1
}
data_json.append(tmp_json)

tmp_json = {
"label_text": "voice preset",
"label_tip": "VALL-E-X说话人预设名(Prompt name)",
"data": vall_e_x_config["voice_preset"],
"main_obj_name": "vall_e_x",
"index": 1
}
data_json.append(tmp_json)

tmp_json = {
"label_text": "voice_preset_file_path",
"label_tip": "VALL-E-X说话人预设文件路径(npz)",
"data": vall_e_x_config["voice_preset_file_path"],
"main_obj_name": "vall_e_x",
"index": 1
}
data_json.append(tmp_json)

widgets = self.create_widgets_from_json(data_json)

# 动态添加widget到对应的gridLayout
row = 0
# 分2列,左边就是label说明,右边就是输入框等
for i in range(0, len(widgets), 2):
self.ui.gridLayout_vall_e_x.addWidget(widgets[i], row, 0)
self.ui.gridLayout_vall_e_x.addWidget(widgets[i + 1], row, 1)
row += 1

vall_e_x_gui_create()


"""
↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑
-------------------------------------------------------------------------------------------------------------
"""

# 显隐各板块
self.oncomboBox_chat_type_IndexChanged(chat_type_index)
Expand Down Expand Up @@ -1994,6 +2070,8 @@ def common_textEdit_handle(content):
config_data["audio_synthesis_type"] = "genshinvoice_top"
elif audio_synthesis_type == "bark_gui":
config_data["audio_synthesis_type"] = "bark_gui"
elif audio_synthesis_type == "VALL-E-X":
config_data["audio_synthesis_type"] = "vall_e_x"

# 音频随机变速
config_data["audio_random_speed"]["normal"]["enable"] = self.ui.checkBox_audio_random_speed_normal_enable.isChecked()
Expand Down Expand Up @@ -2101,6 +2179,10 @@ def common_textEdit_handle(content):

schedule_data = self.update_data_from_gridLayout(self.ui.gridLayout_schedule)

"""
动态读取GUI内数据到配置变量
"""

def reorganize_schedule_data(schedule_data):
tmp_json = []
keys = list(schedule_data.keys())
Expand Down Expand Up @@ -2293,6 +2375,31 @@ def reorganize_web_captions_printer_data(web_captions_printer_data):
# 写回json
config_data["web_captions_printer"] = reorganize_web_captions_printer_data(web_captions_printer_data)

# VALL-E-X
def reorganize_vall_e_x_data(vall_e_x_data):
keys = list(vall_e_x_data.keys())

tmp_json = {
"api_ip_port": vall_e_x_data[keys[0]],
"language": vall_e_x_data[keys[1]],
"accent": vall_e_x_data[keys[2]],
"voice_preset": vall_e_x_data[keys[3]],
"voice_preset_file_path": vall_e_x_data[keys[4]]
}

logging.debug(f"tmp_json={tmp_json}")

return tmp_json

vall_e_x_data = self.update_data_from_gridLayout(self.ui.gridLayout_vall_e_x)
# 写回json
config_data["vall_e_x"] = reorganize_vall_e_x_data(vall_e_x_data)

"""
↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑
-------------------------------------------------------------------------------------------------------------
"""

# 获取自定义板块显隐的数据
show_box_data = self.update_data_from_gridLayout(self.ui.gridLayout_show_box, "show_box")
show_box_json = {}
Expand Down Expand Up @@ -2962,25 +3069,28 @@ def oncomboBox_chat_type_IndexChanged(self, index):


# 语音合成类型改变 加载显隐不同groupBox
# 当你新增TTS时,你需要同步修改此处的TTS配置的显隐
def oncomboBox_audio_synthesis_type_IndexChanged(self, index):
# 各index对应的groupbox的显隐值
visibility_map = {
0: (1, 0, 0, 0, 0, 0),
1: (0, 1, 0, 0, 0, 0),
2: (0, 0, 1, 0, 0, 0),
3: (0, 0, 0, 1, 0, 0),
4: (0, 0, 0, 0, 1, 0),
5: (0, 0, 0, 0, 0, 1)
0: (1, 0, 0, 0, 0, 0, 0),
1: (0, 1, 0, 0, 0, 0, 0),
2: (0, 0, 1, 0, 0, 0, 0),
3: (0, 0, 0, 1, 0, 0, 0),
4: (0, 0, 0, 0, 1, 0, 0),
5: (0, 0, 0, 0, 0, 1, 0),
6: (0, 0, 0, 0, 0, 0, 1)
}

visibility_values = visibility_map.get(index, (0, 0, 0, 0, 0, 0))
visibility_values = visibility_map.get(index, (0, 0, 0, 0, 0, 0, 0))

self.ui.groupBox_edge_tts.setVisible(visibility_values[0])
self.ui.groupBox_vits.setVisible(visibility_values[1])
self.ui.groupBox_vits_fast.setVisible(visibility_values[2])
self.ui.groupBox_elevenlabs.setVisible(visibility_values[3])
self.ui.groupBox_genshinvoice_top.setVisible(visibility_values[4])
self.ui.groupBox_bark_gui.setVisible(visibility_values[5])
self.ui.groupBox_vall_e_x.setVisible(visibility_values[6])


# 语音识别类型改变 加载显隐不同groupBox
Expand Down
13 changes: 13 additions & 0 deletions tests/test_VALL-E-X/api5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from gradio_client import Client

client = Client("http://127.0.0.1:7860/")
result = client.predict(
"こんにちは", # str in 'Text' Textbox component
"auto-detect", # str (Option from: ['auto-detect', 'English', '中文', '日本語', 'Mix']) in 'language' Dropdown component
"no-accent", # str (Option from: ['no-accent', 'English', '中文', '日本語']) in 'accent' Dropdown component
"ikaros", # str (Option from: ['astraea', 'cafe', 'dingzhen', 'esta', 'ikaros', 'MakiseKurisu', 'mikako', 'nymph', 'rosalia', 'seel', 'sohara', 'sukata', 'tomoki', 'tomoko', 'yaesakura', '早见沙织', '神里绫华-日语']) in 'Voice preset' Dropdown component
"ikaros.npz", # str (filepath or URL to file) in 'parameter_46' File component
fn_index=5
)
print(type(result))
print(result)
Binary file added tests/test_VALL-E-X/ikaros.npz
Binary file not shown.
16 changes: 14 additions & 2 deletions ui/main.ui
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ background-color: rgba(255, 255, 255, 50);
<property name="geometry">
<rect>
<x>0</x>
<y>-8830</y>
<y>-6514</y>
<width>984</width>
<height>9572</height>
<height>9648</height>
</rect>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
Expand Down Expand Up @@ -2032,6 +2032,18 @@ background-color: rgba(255, 255, 255, 50);
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_vall_e_x">
<property name="title">
<string>VALL-E-X</string>
</property>
<layout class="QGridLayout" name="gridLayout_70">
<item row="0" column="0">
<layout class="QGridLayout" name="gridLayout_vall_e_x"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_live2d">
<property name="title">
Expand Down
Loading

0 comments on commit 17ec290

Please sign in to comment.