Skip to content

Commit

Permalink
feat 支持自定义词表
Browse files Browse the repository at this point in the history
  • Loading branch information
jxxghp committed Jul 17, 2023
1 parent 5f61387 commit 2b9c4b0
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 57 deletions.
18 changes: 10 additions & 8 deletions app/chain/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def process(self, body: Any, form: Any, args: Any) -> None:
f"{self._current_meta.sea} 媒体库中已存在",
userid=userid))
return
# 添加订阅,状态为N
self.subscribechain.add(title=mediainfo.title,
year=mediainfo.year,
mtype=mediainfo.type,
Expand Down Expand Up @@ -182,15 +183,16 @@ def process(self, body: Any, form: Any, args: Any) -> None:
else:
# 未完成下载
logger.info(f'{self._current_media.title_year} 未下载未完整,添加订阅 ...')
# 添加订阅
# 添加订阅,状态为R
self.subscribechain.add(title=self._current_media.title,
year=self._current_media.year,
mtype=self._current_media.type,
tmdbid=self._current_media.tmdb_id,
season=self._current_meta.begin_season,
channel=channel,
userid=userid,
username=username)
username=username,
state="R")
else:
# 下载种子
context: Context = cache_list[int(text) - 1]
Expand All @@ -203,13 +205,13 @@ def process(self, body: Any, form: Any, args: Any) -> None:
if not cache_data:
# 没有缓存
self.post_message(Notification(
channel=channel, title="输入有误!", userid=userid))
channel=channel, title="输入有误!", userid=userid))
return

if self._current_page == 0:
# 第一页
self.post_message(Notification(
channel=channel, title="已经是第一页了!", userid=userid))
channel=channel, title="已经是第一页了!", userid=userid))
return
cache_type: str = cache_data.get('type')
cache_list: list = cache_data.get('items')
Expand Down Expand Up @@ -242,7 +244,7 @@ def process(self, body: Any, form: Any, args: Any) -> None:
if not cache_data:
# 没有缓存
self.post_message(Notification(
channel=channel, title="输入有误!", userid=userid))
channel=channel, title="输入有误!", userid=userid))
return
cache_type: str = cache_data.get('type')
cache_list: list = cache_data.get('items')
Expand All @@ -253,7 +255,7 @@ def process(self, body: Any, form: Any, args: Any) -> None:
if not cache_list:
# 没有数据
self.post_message(Notification(
channel=channel, title="已经是最后一页了!", userid=userid))
channel=channel, title="已经是最后一页了!", userid=userid))
return
else:
if cache_type == "Torrent":
Expand Down Expand Up @@ -282,12 +284,12 @@ def process(self, body: Any, form: Any, args: Any) -> None:
# 识别
if not meta.name:
self.post_message(Notification(
channel=channel, title="无法识别输入内容!", userid=userid))
channel=channel, title="无法识别输入内容!", userid=userid))
return
# 开始搜索
if not medias:
self.post_message(Notification(
channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid))
channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid))
return
logger.info(f"搜索到 {len(medias)} 条相关媒体信息")
# 记录当前状态
Expand Down
2 changes: 1 addition & 1 deletion app/core/meta/metaanime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import zhconv
import anitopy
from app.core.meta.metabase import MetaBase
from app.core.meta.release_groups import ReleaseGroupsMatcher
from app.core.meta.releasegroup import ReleaseGroupsMatcher
from app.utils.string import StringUtils
from app.schemas.types import MediaType

Expand Down
6 changes: 5 additions & 1 deletion app/core/meta/metabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ class MetaBase(object):
"""
# 是否处理的文件
isfile: bool = False
# 原字符串
# 原标题字符串
title: str = ""
# 识别用字符串
org_string: Optional[str] = None
# 副标题
subtitle: Optional[str] = None
Expand Down Expand Up @@ -53,6 +55,8 @@ class MetaBase(object):
video_encode: Optional[str] = None
# 音频编码
audio_encode: Optional[str] = None
# 应用的识别词信息
apply_words: Optional[List[str]] = None

# 副标题解析
_subtitle_flag = False
Expand Down
2 changes: 1 addition & 1 deletion app/core/meta/metavideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from app.core.config import settings
from app.core.meta.metabase import MetaBase
from app.core.meta.release_groups import ReleaseGroupsMatcher
from app.core.meta.releasegroup import ReleaseGroupsMatcher
from app.utils.string import StringUtils
from app.utils.tokens import Tokens
from app.schemas.types import MediaType
Expand Down
22 changes: 9 additions & 13 deletions app/core/meta/release_groups.py → app/core/meta/releasegroup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import regex as re

from app.db.systemconfig_oper import SystemConfigOper
from app.schemas.types import SystemConfigKey
from app.utils.singleton import Singleton


Expand All @@ -8,8 +10,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
识别制作组、字幕组
"""
__release_groups: str = None
custom_release_groups: str = None
custom_separator: str = None
# 内置组
RELEASE_GROUPS: dict = {
"0ff": ['FF(?:(?:A|WE)B|CD|E(?:DU|B)|TV)'],
"1pt": [],
Expand Down Expand Up @@ -74,6 +75,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
}

def __init__(self):
self.systemconfig = SystemConfigOper()
release_groups = []
for site_groups in self.RELEASE_GROUPS.values():
for release_group in site_groups:
Expand All @@ -89,8 +91,10 @@ def match(self, title: str = None, groups: str = None):
if not title:
return ""
if not groups:
if self.custom_release_groups:
groups = f"{self.__release_groups}|{self.custom_release_groups}"
# 自定义组
custom_release_groups = self.systemconfig.get(SystemConfigKey.CustomReleaseGroups)
if custom_release_groups:
groups = f"{self.__release_groups}|{custom_release_groups}"
else:
groups = self.__release_groups
title = f"{title} "
Expand All @@ -100,12 +104,4 @@ def match(self, title: str = None, groups: str = None):
for item in re.findall(groups_re, title):
if item not in unique_groups:
unique_groups.append(item)
separator = self.custom_separator or "@"
return separator.join(unique_groups)

def update_custom(self, release_groups: str = None, separator: str = None):
"""
更新自定义制作组/字幕组,自定义分隔符
"""
self.custom_release_groups = release_groups
self.custom_separator = separator
return "@".join(unique_groups)
118 changes: 118 additions & 0 deletions app/core/meta/words.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from typing import List, Tuple

import cn2an
import regex as re

from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
from app.schemas.types import SystemConfigKey
from app.utils.singleton import Singleton


class WordsMatcher(metaclass=Singleton):

def __init__(self):
self.systemconfig = SystemConfigOper()

def prepare(self, title: str) -> Tuple[str, List[str]]:
"""
预处理标题,支持三种格式
1:屏蔽词
2:被替换词 => 替换词
3:前定位词 <> 后定位词 >> 偏移量(EP)
"""
appley_words = []
# 读取自定义识别词
words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
for word in words:
if not word:
continue
try:
if word.count(" => "):
# 替换词
strings = word.split(" => ")
title, message, state = self.__replace_regex(title, strings[0], strings[1])
elif word.count(" >> ") and word.count(" <> "):
# 集偏移
strings = word.split(" <> ")
offsets = strings[1].split(" >> ")
title, message, state = self.__episode_offset(title, strings[0], strings[1],
offsets[1])
else:
# 屏蔽词
title, message, state = self.__replace_regex(title, word, "")

if state:
appley_words.append(word)
else:
logger.error(f"自定义识别词替换失败:{message}")
except Exception as err:
print(str(err))

return title, appley_words

@staticmethod
def __replace_regex(title: str, replaced: str, replace: str) -> Tuple[str, str, bool]:
"""
正则替换
"""
try:
if not re.findall(r'%s' % replaced, title):
return title, "", False
else:
return re.sub(r'%s' % replaced, r'%s' % replace, title), "", True
except Exception as err:
print(str(err))
return title, str(err), False

@staticmethod
def __episode_offset(title: str, front: str, back: str, offset: str) -> Tuple[str, str, bool]:
"""
集数偏移
"""
try:
if back and not re.findall(r'%s' % back, title):
return title, "", False
if front and not re.findall(r'%s' % front, title):
return title, "", False
offset_word_info_re = re.compile(r'(?<=%s.*?)[0-9一二三四五六七八九十]+(?=.*?%s)' % (front, back))
episode_nums_str = re.findall(offset_word_info_re, title)
if not episode_nums_str:
return title, "", False
episode_nums_offset_str = []
offset_order_flag = False
for episode_num_str in episode_nums_str:
episode_num_int = int(cn2an.cn2an(episode_num_str, "smart"))
offset_caculate = offset.replace("EP", str(episode_num_int))
episode_num_offset_int = int(eval(offset_caculate))
# 向前偏移
if episode_num_int > episode_num_offset_int:
offset_order_flag = True
# 向后偏移
elif episode_num_int < episode_num_offset_int:
offset_order_flag = False
# 原值是中文数字,转换回中文数字,阿拉伯数字则还原0的填充
if not episode_num_str.isdigit():
episode_num_offset_str = cn2an.an2cn(episode_num_offset_int, "low")
else:
count_0 = re.findall(r"^0+", episode_num_str)
if count_0:
episode_num_offset_str = f"{count_0[0]}{episode_num_offset_int}"
else:
episode_num_offset_str = str(episode_num_offset_int)
episode_nums_offset_str.append(episode_num_offset_str)
episode_nums_dict = dict(zip(episode_nums_str, episode_nums_offset_str))
# 集数向前偏移,集数按升序处理
if offset_order_flag:
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1])
# 集数向后偏移,集数按降序处理
else:
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1], reverse=True)
for episode_num in episode_nums_list:
episode_offset_re = re.compile(
r'(?<=%s.*?)%s(?=.*?%s)' % (front, episode_num[0], back))
title = re.sub(episode_offset_re, r'%s' % episode_num[1], title)
return title, "", True
except Exception as err:
print(str(err))
return title, str(err), False
20 changes: 15 additions & 5 deletions app/core/metainfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,37 @@
import regex as re

from app.core.config import settings
from app.core.meta import MetaAnime, MetaVideo
from app.core.meta import MetaAnime, MetaVideo, MetaBase
from app.core.meta.words import WordsMatcher


def MetaInfo(title: str, subtitle: str = None):
def MetaInfo(title: str, subtitle: str = None) -> MetaBase:
"""
媒体整理入口,根据名称和副标题,判断是哪种类型的识别,返回对应对象
:param title: 标题、种子名、文件名
:param subtitle: 副标题、描述
:return: MetaAnime、MetaVideo
"""

# 原标题
org_title = title
# 预处理标题
title, apply_words = WordsMatcher().prepare(title)
# 判断是否处理文件
if title and Path(title).suffix.lower() in settings.RMT_MEDIAEXT:
isfile = True
else:
isfile = False
# 识别
meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
# 记录原标题
meta.title = org_title
# 记录使用的识别词
meta.apply_words = apply_words or []

return MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
return meta


def is_anime(name: str):
def is_anime(name: str) -> bool:
"""
判断是否为动漫
:param name: 名称
Expand Down
28 changes: 0 additions & 28 deletions app/modules/words/__init__.py

This file was deleted.

4 changes: 4 additions & 0 deletions app/schemas/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ class SystemConfigKey(Enum):
TorrentsPriority = "TorrentsPriority"
# 通知消息渠道设置
NotificationChannels = "NotificationChannels"
# 自定义制作组/字幕组
CustomReleaseGroups = "CustomReleaseGroups"
# 自定义识别词
CustomIdentifiers = "CustomIdentifiers"


# 站点框架
Expand Down

0 comments on commit 2b9c4b0

Please sign in to comment.