Skip to content

Commit

Permalink
Merge pull request Ikaros-521#848 from Ikaros-521/owner
Browse files Browse the repository at this point in the history
过滤 新增 限定时间段内的数据去重功能(针对数据有 弹幕、入场、礼物),时间段内的重复弹幕等会被丢弃
  • Loading branch information
Ikaros-521 authored May 19, 2024
2 parents 22dfb7c + 5db7c8b commit 1ea6a84
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 0 deletions.
6 changes: 6 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@
"idle_time_task_forget_reserve_num": 1,
"image_recognition_schedule_forget_duration": 0.1,
"image_recognition_schedule_forget_reserve_num": 1,
"limited_time_deduplication": {
"enable": false,
"comment": 10,
"gift": 10,
"entrance": 60
},
"message_queue_max_len": 50,
"voice_tmp_path_queue_max_len": 100,
"priority_mapping": {
Expand Down
6 changes: 6 additions & 0 deletions config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@
"idle_time_task_forget_reserve_num": 1,
"image_recognition_schedule_forget_duration": 0.1,
"image_recognition_schedule_forget_reserve_num": 1,
"limited_time_deduplication": {
"enable": false,
"comment": 10,
"gift": 10,
"entrance": 60
},
"message_queue_max_len": 50,
"voice_tmp_path_queue_max_len": 100,
"priority_mapping": {
Expand Down
85 changes: 85 additions & 0 deletions utils/my_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class My_handle(metaclass=SingletonMeta):
# 是否在数据处理中
is_handleing = 0

# 异常报警数据
abnormal_alarm_data = {
"platform": {
"error_count": 0
Expand All @@ -66,6 +67,13 @@ class My_handle(metaclass=SingletonMeta):
}
}

# 直播消息存储(入场、礼物、弹幕),用于限定时间内的去重
live_data = {
"comment": [],
"gift": [],
"entrance": [],
}

# 答谢板块文案数据临时存储
thanks_entrance_copy = []
thanks_gift_copy = []
Expand Down Expand Up @@ -145,9 +153,34 @@ def __init__(self, config_path):
self.config_load()

logging.info(f"配置数据加载成功。")

# 启动定时器
self.start_timers()
except Exception as e:
logging.error(traceback.format_exc())

# 清空live_data直播数据
def clear_live_data(self, type: str=""):
if type != "" and type is not None:
My_handle.live_data[type] = []


# 启动定时器
def start_timers(self):
if My_handle.config.get("filter", "limited_time_deduplication", "enable"):
from functools import partial

# 设置定时器,每隔n秒执行一次
self.comment_check_timer = threading.Timer(int(My_handle.config.get("filter", "limited_time_deduplication", "comment")), partial(self.clear_live_data, "comment"))
self.comment_check_timer.start()

self.gift_check_timer = threading.Timer(int(My_handle.config.get("filter", "limited_time_deduplication", "gift")), partial(self.clear_live_data, "gift"))
self.gift_check_timer.start()

self.entrance_check_timer = threading.Timer(int(My_handle.config.get("filter", "limited_time_deduplication", "entrance")), partial(self.clear_live_data, "entrance"))
self.entrance_check_timer.start()

logging.info("启动限定时间直播数据去重定时器")

# 是否位于数据处理状态
def is_handle_empty(self):
Expand Down Expand Up @@ -2045,6 +2078,46 @@ def blacklist_handle(self, data):
logging.error(traceback.format_exc())
return False



# 判断限定时间段内数据是否重复
def is_data_repeat_in_limited_time(self, type: str=None, data: dict=None):
"""判断限定时间段内数据是否重复
Args:
type (str): 判断的数据类型(comment|gift|entrance)
data (dict): 包含用户名,弹幕内容
Returns:
dict: 传递给音频合成的JSON数据
"""
if My_handle.config.get("filter", "limited_time_deduplication", "enable"):
logging.debug(f"限定时间段内数据重复 My_handle.live_data={My_handle.live_data}")

if type is not None and type != "" and data is not None:
if type == "comment":
# 如果存在重复数据,返回True
for tmp in My_handle.live_data[type]:
if tmp['username'] == data['username'] and tmp['content'] == data['content']:
logging.debug(f"限定时间段内数据重复 type={type},data={data}")
return True
elif type == "gift":
# 如果存在重复数据,返回True
for tmp in My_handle.live_data[type]:
if tmp['username'] == data['username']:
logging.debug(f"限定时间段内数据重复 type={type},data={data}")
return True
elif type == "entrance":
# 如果存在重复数据,返回True
for tmp in My_handle.live_data[type]:
if tmp['username'] == data['username']:
logging.debug(f"限定时间段内数据重复 type={type},data={data}")
return True

# 不存在则插入,返回False
My_handle.live_data[type].append(data)
return False

"""
,`
Expand Down Expand Up @@ -2084,6 +2157,10 @@ def comment_handle(self, data):
# 输出当前用户发送的弹幕消息
logging.debug(f"[{username}]: {content}")

# 限定时间数据去重
if self.is_data_repeat_in_limited_time("comment", data):
return None

# 黑名单过滤
if self.blacklist_handle(data):
return None
Expand Down Expand Up @@ -2351,6 +2428,10 @@ def comment_handle(self, data):
# 礼物处理
def gift_handle(self, data):
try:
# 限定时间数据去重
if self.is_data_repeat_in_limited_time("gift", data):
return None

# 记录数据库
if My_handle.config.get("database", "gift_enable"):
insert_data_sql = '''
Expand Down Expand Up @@ -2447,6 +2528,10 @@ def gift_handle(self, data):
# 入场处理
def entrance_handle(self, data):
try:
# 限定时间数据去重
if self.is_data_repeat_in_limited_time("entrance", data):
return None

# 记录数据库
if My_handle.config.get("database", "entrance_enable"):
insert_data_sql = '''
Expand Down
13 changes: 13 additions & 0 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,11 @@ def common_textarea_handle(content):
config_data["filter"]["image_recognition_schedule_forget_duration"] = round(float(input_filter_image_recognition_schedule_forget_duration.value), 2)
config_data["filter"]["image_recognition_schedule_forget_reserve_num"] = int(input_filter_image_recognition_schedule_forget_reserve_num.value)

config_data["filter"]["limited_time_deduplication"]["enable"] = switch_filter_limited_time_deduplication_enable.value
config_data["filter"]["limited_time_deduplication"]["comment"] = int(input_filter_limited_time_deduplication_comment.value)
config_data["filter"]["limited_time_deduplication"]["gift"] = int(input_filter_limited_time_deduplication_gift.value)
config_data["filter"]["limited_time_deduplication"]["entrance"] = int(input_filter_limited_time_deduplication_entrance.value)

# 优先级
config_data["filter"]["message_queue_max_len"] = int(input_filter_message_queue_max_len.value)
config_data["filter"]["voice_tmp_path_queue_max_len"] = int(input_filter_voice_tmp_path_queue_max_len.value)
Expand Down Expand Up @@ -2877,6 +2882,7 @@ def save_config():
input_filter_badwords_path = ui.input(label='违禁词路径', value=config.get("filter", "badwords", "path"), placeholder='本地违禁词数据路径(你如果不需要,可以清空文件内容)').style("width:200px;")
input_filter_badwords_bad_pinyin_path = ui.input(label='违禁拼音路径', value=config.get("filter", "badwords", "bad_pinyin_path"), placeholder='本地违禁拼音数据路径(你如果不需要,可以清空文件内容)').style("width:200px;")
input_filter_badwords_replace = ui.input(label='违禁词替换', value=config.get("filter", "badwords", "replace"), placeholder='在不丢弃违禁语句的前提下,将违禁词替换成此项的文本').style("width:200px;")

with ui.expansion('消息遗忘&保留设置', icon="settings", value=True).classes('w-full'):
with ui.element('div').classes('p-2 bg-blue-100'):
ui.label("遗忘间隔 指的是每隔这个间隔时间(秒),就会丢弃这个间隔时间中接收到的数据,但会保留最新的n个数据;保留数 指的是保留最新收到的数据的数量")
Expand All @@ -2900,6 +2906,13 @@ def save_config():
input_filter_idle_time_task_forget_reserve_num = ui.input(label='闲时任务保留数', placeholder='保留最新收到的数据的数量', value=config.get("filter", "idle_time_task_forget_reserve_num")).style("width:200px;")
input_filter_image_recognition_schedule_forget_duration = ui.input(label='图像识别遗忘间隔', placeholder='指的是每隔这个间隔时间(秒),就会丢弃这个间隔时间中接收到的数据,\n保留数据在以下配置中可以自定义', value=config.get("filter", "image_recognition_schedule_forget_duration")).style("width:200px;")
input_filter_image_recognition_schedule_forget_reserve_num = ui.input(label='图像识别保留数', placeholder='保留最新收到的数据的数量', value=config.get("filter", "image_recognition_schedule_forget_reserve_num")).style("width:200px;")
with ui.expansion('限定时间段内数据重复丢弃', icon="settings", value=True).classes('w-full'):
with ui.row():
switch_filter_limited_time_deduplication_enable = ui.switch('启用', value=config.get("filter", "limited_time_deduplication", "enable")).style(switch_internal_css)
input_filter_limited_time_deduplication_comment = ui.input(label='弹幕检测周期', value=config.get("filter", "limited_time_deduplication", "comment"), placeholder='在这个周期时间(秒)内,重复的数据将被丢弃').style("width:200px;")
input_filter_limited_time_deduplication_gift = ui.input(label='礼物检测周期', value=config.get("filter", "limited_time_deduplication", "gift"), placeholder='在这个周期时间(秒)内,重复的数据将被丢弃').style("width:200px;")
input_filter_limited_time_deduplication_entrance = ui.input(label='入场检测周期', value=config.get("filter", "limited_time_deduplication", "entrance"), placeholder='在这个周期时间(秒)内,重复的数据将被丢弃').style("width:200px;")

with ui.expansion('待合成音频的消息&待播放音频队列', icon="settings", value=True).classes('w-full'):
with ui.row():
input_filter_message_queue_max_len = ui.input(label='消息队列最大保留长度', placeholder='收到的消息,生成的文本内容,会根据优先级存入消息队列,当新消息的优先级低于队列中所有的消息且超过此长度时,此消息将被丢弃', value=config.get("filter", "message_queue_max_len")).style("width:160px;")
Expand Down

0 comments on commit 1ea6a84

Please sign in to comment.