Skip to content

Commit

Permalink
faster_whisper新增语言设定;修复录音解析失败无法重复录音bug;新增音频信息回调开关
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed May 23, 2024
1 parent ef4baa4 commit b01e99d
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 23 deletions.
4 changes: 3 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"normal_interval_min": 0.3,
"normal_interval_max": 0.5,
"out_path": "out",
"player": "pygame"
"player": "pygame",
"info_to_callback": true
},
"audio_player": {
"api_ip_port": "http://127.0.0.1:5600"
Expand Down Expand Up @@ -909,6 +910,7 @@
},
"faster_whisper": {
"model_size": "large-v3",
"language": "自动识别",
"device": "cuda",
"compute_type": "float16",
"download_root": "./models",
Expand Down
4 changes: 3 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"normal_interval_min": 0.3,
"normal_interval_max": 0.5,
"out_path": "out",
"player": "pygame"
"player": "pygame",
"info_to_callback": true
},
"audio_player": {
"api_ip_port": "http://127.0.0.1:5600"
Expand Down Expand Up @@ -909,6 +910,7 @@
},
"faster_whisper": {
"model_size": "large-v3",
"language": "自动识别",
"device": "cuda",
"compute_type": "float16",
"download_root": "./models",
Expand Down
8 changes: 7 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,11 @@ def do_listen_and_comment(status=True):

logging.debug("faster_whisper模型加载中...")

segments, info = faster_whisper_model.transcribe(WAVE_OUTPUT_FILENAME, beam_size=config.get("talk", "faster_whisper", "beam_size"))
language = config.get("talk", "faster_whisper", "language")
if language == "自动识别":
language = None

segments, info = faster_whisper_model.transcribe(WAVE_OUTPUT_FILENAME, language=language, beam_size=config.get("talk", "faster_whisper", "beam_size"))

logging.debug("识别语言为:'%s',概率:%f" % (info.language, info.language_probability))

Expand All @@ -485,6 +489,8 @@ def do_listen_and_comment(status=True):
content += segment.text + "。"

if content == "":
# 恢复录音标志位
is_recording = False
return

# 输出识别结果
Expand Down
31 changes: 19 additions & 12 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,22 +1032,29 @@ def send_audio_play_info_to_callback(self, data: dict=None):
Args:
data (dict): 音频播放信息
"""
if data is None:
data = {
"type": "audio_playback_completed",
"data": {
# 待播放音频数量
"wait_play_audio_num": len(Audio.voice_tmp_path_queue),
# 待合成音频的消息数量
"wait_synthesis_msg_num": len(Audio.message_queue),
try:
if False == self.config.get("play_audio", "info_to_callback"):
return None

if data is None:
data = {
"type": "audio_playback_completed",
"data": {
# 待播放音频数量
"wait_play_audio_num": len(Audio.voice_tmp_path_queue),
# 待合成音频的消息数量
"wait_synthesis_msg_num": len(Audio.message_queue),
}
}
}

logging.debug(f"data={data}")
logging.debug(f"data={data}")

resp = self.common.send_request(f'http://{self.config.get("api_ip")}:{self.config.get("api_port")}/callback', "POST", data)
resp = self.common.send_request(f'http://{self.config.get("api_ip")}:{self.config.get("api_port")}/callback', "POST", data)

return resp
return resp
except Exception as e:
logging.error(traceback.format_exc())
return None


# 播放音频
Expand Down
14 changes: 7 additions & 7 deletions utils/my_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,9 +1257,9 @@ def sd_handle(self, data):
return False


# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
def comment_check_and_replace(self, content):
"""弹幕格式检查和特殊字符替换
"""弹幕格式检查和特殊字符替换和指定语言过滤
Args:
content (str): 待处理的弹幕内容
Expand Down Expand Up @@ -1389,7 +1389,7 @@ def reread_handle(self, data, filter=False, type="reread"):
if content is None:
return

# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
content = self.comment_check_and_replace(content)
if content is None:
return
Expand Down Expand Up @@ -2452,7 +2452,7 @@ def comment_handle(self, data):
if content is None:
return

# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
content = self.comment_check_and_replace(content)
if content is None:
return
Expand Down Expand Up @@ -2934,7 +2934,7 @@ def idle_time_task_handle(self, data):
# 输出当前用户发送的弹幕消息
logging.info(f"[{username}]: {content}")

# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
content = self.comment_check_and_replace(content)
if content is None:
return None
Expand Down Expand Up @@ -2976,7 +2976,7 @@ def idle_time_task_handle(self, data):
# 输出当前用户发送的弹幕消息
logging.info(f"[{username}]: {content}")

# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
content = self.comment_check_and_replace(content)
if content is None:
return None
Expand Down Expand Up @@ -3287,7 +3287,7 @@ def talk_handle(self, data):
if content is None:
return

# 弹幕格式检查和特殊字符替换
# 弹幕格式检查和特殊字符替换和指定语言过滤
content = self.comment_check_and_replace(content)
if content is None:
return
Expand Down
39 changes: 38 additions & 1 deletion webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,34 @@ def check_config():

return True

"""
.................................................................................................................................................................
.................................................................................................................................................................
.................................................................................................................................................................
.................................................................................................................................................................
.............................................................................................................:**.................................................
........+++..........-++:....:++:...*##############:%%%%%%%%%#.....%%%%%%%%%%%%%%%%%%%%%%%.....%@#...........-@%..........+%%%%%%%%%%%%%+-----------:............
........%@#..........=@@=....-@@=....::::%#:=@+::::.........%%.....%%.....%%.....%#.....%%......+@@*..#%%%%%%%@@%%%%%%%%....=@#.....%@-.*%@#######%@=............
........%@#..........=@@=....-@@=........%*.-@+.............%%.....%@%%%%%@@%%%%%@@%%%%%@%........%%:........-@%............=@#.....%@-..#@-......#@-............
........%@#..........=@@=....-@@=....%%%%@@%%@%%%%=.........%%.....::........#%=........::...................=@%:...........=@%#####%@-..=@=.....-%%.............
........%@#..........=@@=....-@@=....%%..%*.-@+.=@=.........%%...%%%%%%%%%%%%@@%%%%%%%%%%%%*.:-----..#%%%%%%%%%%%%%%%%%@-...=@#-----%@-..:%#.....*@=.............
........%@#..........=@@=....-@@=....%%.:%*.-@+.=@=.-%@@@@@@@%...............%@=.............+##%@%.....=%%+:..=@#....#%:...=@#.....%@-...#@-....%%..............
........%@#..........=@@=....-@@=....%%.+@=.-@+.=@=.=@+.....##.......@%***************#@+.......=@%....-..:*%#.=@#....+*....=@#-----%@-...-%#...#@=..............
........%@#..........=@@=....-@@=....%%+@#...*%%%@=.=@+..............@#===============*@+.......=@%...-#@%*:...+@*..........=@%*****%@-....*@=.=%*...............
........#@%..........+@@:....-@@=....%%-*.......=@=.=@+..............@#-::::::::::::::+@+.......=@%......:**...*@+..........=@#.....%@-.....%@#%%................
........*@@=........:%@#.....-@@=....%@%%%%%%%%%%@=.=@+......-*:.....@%%%%%%%%%%%%%%%%%@+.......=@%.:%@@@@@@@@@@@@@@@@@@%...=@#.....%@++*=...%@#.................
.........*@@%-.....*%@%......-@@=....%%.........=@=.-@+......+@=.....@*...............=@+.......=@%..:........%@*.........+#%@%%%@@@@@#+-..:%@%@%................
..........:%%@@@@@@%%-.......-@@=....%%.........=@=.-@+......%@-.....@%%%%%%%%%%%%%%%%%@+.......=@%#@%:....:#@%*%@%*......+*=:......%@-...#@%..:%@*..............
.....................................%@@@@@@@@@@@@=.:%@#+==+%@%.....:@#...............=@*.......#@@#-..:+%@@%-....=#@@#-............%@--%@%-.....+%@%=...........
.....................................%%.........=%=...=*****+:..-***************************-...-+...#@%#+:..........-#%:...........%@=%#:.........+#............
.................................................................................................................................................................
.................................................................................................................................................................
.................................................................................................................................................................
.................................................................................................................................................................
"""

# 读取webui配置到dict变量
def webui_config_to_dict(config_data):
"""读取webui配置到dict变量
Expand Down Expand Up @@ -2404,6 +2432,7 @@ def common_textarea_handle(content):
config_data["talk"]["baidu"]["api_key"] = input_talk_baidu_api_key.value
config_data["talk"]["baidu"]["secret_key"] = input_talk_baidu_secret_key.value
config_data["talk"]["faster_whisper"]["model_size"] = input_faster_whisper_model_size.value
config_data["talk"]["faster_whisper"]["language"] = select_faster_whisper_language.value
config_data["talk"]["faster_whisper"]["device"] = select_faster_whisper_device.value
config_data["talk"]["faster_whisper"]["compute_type"] = select_faster_whisper_compute_type.value
config_data["talk"]["faster_whisper"]["download_root"] = input_faster_whisper_download_root.value
Expand Down Expand Up @@ -3245,7 +3274,7 @@ def save_config():
with ui.card().style(card_css):
ui.label('web字幕打印机')
with ui.grid(columns=2):
switch_web_captions_printer_enable = ui.switch('启用', value=config.get("web_captions_printer", "enable")).style(switch_internal_css)
switch_web_captions_printer_enable = ui.switch('启用', value=config.get("web_captions_printer", "enable")).style(switch_internal_css).tooltip("如果您使用了audio player来做音频播放,并开启了其web字幕打印机功能,\n那请勿启动此功能,因为这样就重复惹")
input_web_captions_printer_api_ip_port = ui.input(
label='API地址',
value=config.get("web_captions_printer", "api_ip_port"),
Expand Down Expand Up @@ -5250,6 +5279,14 @@ async def fish_speech_load_model(data):
with ui.row():
input_faster_whisper_model_size = ui.input(label='model_size', value=config.get("talk", "faster_whisper", "model_size"), placeholder='Size of the model to use')
data_json = {}
for line in ["自动识别", 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'zh', 'yue']:
data_json[line] = line
select_faster_whisper_language = ui.select(
label='识别语言',
options=data_json,
value=config.get("talk", "faster_whisper", "language")
).style("width:200px")
data_json = {}
for line in ["cuda", "cpu", "auto"]:
data_json[line] = line
select_faster_whisper_device = ui.select(
Expand Down

0 comments on commit b01e99d

Please sign in to comment.