接入YouTube

zhubairui · Oct 20, 2023 · 02023d7 · 02023d7
1 parent 1c5d9f0
commit 02023d7
Show file tree

Hide file tree

Showing 9 changed files with 309 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ Luna AI 是一个由
 
 `ChatterBot / GPT / Claude / langchain 本地 or 云端 / chatglm / text-generation-webui / 讯飞星火 / 智谱AI / 谷歌Bard / 文心一言 / 通义千问`
 
-做为 `大脑` 驱动的虚拟主播 `Live2D / Vtube Studio / UE5 + Audio2Face / xuniren` ，可以在 `Bilibili / 抖音 / 快手 / 斗鱼` 直播中与观众实时互动 或者 直接在本地和您进行聊天
+做为 `大脑` 驱动的虚拟主播 `Live2D / Vtube Studio / UE5 + Audio2Face / xuniren` ，可以在 `Bilibili / 抖音 / 快手 / 斗鱼 / YouTube` 直播中与观众实时互动 或者 直接在本地和您进行聊天
 
 它使用自然语言处理和文本转语音技术 `Edge-TTS / VITS-Fast / elevenlabs / bark-gui / VALL-E-X` 生成对观众问题的回答并可以通过 `so-vits-svc / DDSP-SVC` 变声
 

diff --git a/data/本地问答库.json b/data/本地问答库.json
@@ -40,6 +40,7 @@
     {
         "关键词": [
             "你的主人是谁",
+            "你主人叫什么",
             "你主人是谁"
         ],
         "回答": [

diff --git a/docs/AI Vtuber.xmind b/docs/AI Vtuber.xmind
diff --git a/docs/xmind.png b/docs/xmind.png
diff --git a/main.py b/main.py
@@ -687,7 +687,7 @@ def init_config(self):
             """
             # 修改下拉框内容
             self.ui.comboBox_platform.clear()
-            self.ui.comboBox_platform.addItems(["聊天模式", "哔哩哔哩", "抖音", "快手", "斗鱼"])
+            self.ui.comboBox_platform.addItems(["聊天模式", "哔哩哔哩", "抖音", "快手", "斗鱼", "YouTube"])
             platform_index = 0
             if self.platform == "talk":
                 platform_index = 0
@@ -699,6 +699,8 @@ def init_config(self):
                 platform_index = 3
             elif self.platform == "douyu":
                 platform_index = 4
+            elif self.platform == "youtube":
+                platform_index = 5
             self.ui.comboBox_platform.setCurrentIndex(platform_index)
 
             # 修改输入框内容
@@ -2697,6 +2699,8 @@ def common_textEdit_handle(content):
                 config_data["platform"] = "ks"
             elif platform == "斗鱼":
                 config_data["platform"] = "douyu"
+            elif platform == "YouTube":
+                config_data["platform"] = "youtube"
 
             # 获取单行文本输入框的内容
             room_display_id = self.ui.lineEdit_room_display_id.text()

diff --git a/requirements_youtube.txt b/requirements_youtube.txt
@@ -0,0 +1,38 @@
+openai
+pygame
+edge-tts==6.1.6
+langid
+aiohttp
+asyncio
+slack_sdk==3.21.3
+profanity==1.1
+elevenlabs
+PyPDF2
+protobuf
+protobuf_inspector
+requests
+websocket-client
+langchain==0.0.142
+PyQt5
+pyqt5-tools
+tiktoken
+pyahocorasick
+webuiapi
+pyvirtualcam
+numpy
+faiss-cpu
+sentence_transformers
+pydub
+send2trash
+pypinyin
+sparkdesk-api
+gradio_client
+schedule
+pyhandytools
+curl_cffi
+zhipuai
+bardapi
+revTongYi
+pyautogui
+nicegui
+pytchat
diff --git a/tests/test_youtube/test_pytchat.py b/tests/test_youtube/test_pytchat.py
@@ -0,0 +1,24 @@
+import pytchat
+import time, re
+
+# https://www.youtube.com/watch?v=P5wlxQgYhMY
+video_id = "P5wlxQgYhMY"
+
+live = pytchat.create(video_id=video_id)
+while live.is_alive():
+# while True:
+    try:
+        for c in live.get().sync_items():
+            # if not c.message.startswith("!") and c.message.startswith('#'):
+            # if not c.message.startswith("!"):
+            # 过滤表情包
+            chat_raw = re.sub(r':[^\s]+:', '', c.message)
+            chat_raw = chat_raw.replace('#', '')
+            if chat_raw != '':
+                # chat_author makes the chat look like this: "Nightbot: Hello". So the assistant can respond to the user's name
+                chat = '[' + c.author.name + ']: ' + chat_raw
+                print(chat)
+
+            # time.sleep(1)
+    except Exception as e:
+        print("Error receiving chat: {0}".format(e))
diff --git a/webui.py b/webui.py
@@ -631,7 +631,7 @@ def textarea_data_change(data):
 with ui.tab_panels(tabs, value=common_config_page).classes('w-full'):
     with ui.tab_panel(common_config_page):
         with ui.column():
-            select_platform = ui.select(label='平台', options={'talk': '聊天模式', 'bilibili': '哔哩哔哩', 'dy': '抖音', 'ks': '快手', 'douyu': '斗鱼'}, value=config.get("platform")).style("width:200px;")
+            select_platform = ui.select(label='平台', options={'talk': '聊天模式', 'bilibili': '哔哩哔哩', 'dy': '抖音', 'ks': '快手', 'douyu': '斗鱼', 'youtube': 'YouTube'}, value=config.get("platform")).style("width:200px;")
 
             input_room_display_id = ui.input(label='直播间号', placeholder='一般为直播间URL最后/后面的字母或数字', value=config.get("room_display_id")).style("width:200px;")
 

diff --git a/youtube.py b/youtube.py
@@ -0,0 +1,239 @@
+import logging, os
+import threading
+import schedule
+import random
+import asyncio
+import traceback
+import re
+
+from functools import partial
+
+import pytchat
+
+from utils.common import Common
+from utils.config import Config
+from utils.logger import Configure_logger
+from utils.my_handle import My_handle
+
+"""
+	___ _                       
+	|_ _| | ____ _ _ __ ___  ___ 
+	 | || |/ / _` | '__/ _ \/ __|
+	 | ||   < (_| | | | (_) \__ \
+	|___|_|\_\__,_|_|  \___/|___/
+
+"""
+
+config = None
+common = None
+my_handle = None
+# last_liveroom_data = None
+last_username_list = None
+
+# 点火起飞
+def start_server():
+    global config, common, my_handle, last_username_list
+
+    config_path = "config.json"
+
+    common = Common()
+    config = Config(config_path)
+    # 日志文件路径
+    log_path = "./log/log-" + common.get_bj_time(1) + ".txt"
+    Configure_logger(log_path)
+
+    # 获取 httpx 库的日志记录器
+    httpx_logger = logging.getLogger("httpx")
+    # 设置 httpx 日志记录器的级别为 WARNING
+    httpx_logger.setLevel(logging.WARNING)
+
+    # 最新入场的用户名列表
+    last_username_list = [""]
+
+    my_handle = My_handle(config_path)
+    if my_handle is None:
+        logging.error("程序初始化失败！")
+        os._exit(0)
+
+
+    # 添加用户名到最新的用户名列表
+    def add_username_to_last_username_list(data):
+        global last_username_list
+
+        # 添加数据到 最新入场的用户名列表
+        last_username_list.append(data)
+
+        # 保留最新的3个数据
+        last_username_list = last_username_list[-3:]
+
+
+    # 定时任务
+    def schedule_task(index):
+        logging.debug("定时任务执行中...")
+        hour, min = common.get_bj_time(6)
+
+        if 0 <= hour and hour < 6:
+            time = f"凌晨{hour}点{min}分"
+        elif 6 <= hour and hour < 9:
+            time = f"早晨{hour}点{min}分"
+        elif 9 <= hour and hour < 12:
+            time = f"上午{hour}点{min}分"
+        elif hour == 12:
+            time = f"中午{hour}点{min}分"
+        elif 13 <= hour and hour < 18:
+            time = f"下午{hour - 12}点{min}分"
+        elif 18 <= hour and hour < 20:
+            time = f"傍晚{hour - 12}点{min}分"
+        elif 20 <= hour and hour < 24:
+            time = f"晚上{hour - 12}点{min}分"
+
+
+        # 根据对应索引从列表中随机获取一个值
+        random_copy = random.choice(config.get("schedule")[index]["copy"])
+
+        # 假设有多个未知变量，用户可以在此处定义动态变量
+        variables = {
+            'time': time,
+            'user_num': "N",
+            'last_username': last_username_list[-1],
+        }
+
+        # 使用字典进行字符串替换
+        if any(var in random_copy for var in variables):
+            content = random_copy.format(**{var: value for var, value in variables.items() if var in random_copy})
+        else:
+            content = random_copy
+
+        data = {
+            "platform": "YouTube",
+            "username": None,
+            "content": content
+        }
+
+        logging.info(f"定时任务：{content}")
+
+        my_handle.process_data(data, "schedule")
+
+
+    # 启动定时任务
+    def run_schedule():
+        global config
+
+        try:
+            for index, task in enumerate(config.get("schedule")):
+                if task["enable"]:
+                    # logging.info(task)
+                    # 设置定时任务，每隔n秒执行一次
+                    schedule.every(task["time"]).seconds.do(partial(schedule_task, index))
+        except Exception as e:
+            logging.error(traceback.format_exc())
+
+        while True:
+            schedule.run_pending()
+            # time.sleep(1)  # 控制每次循环的间隔时间，避免过多占用 CPU 资源
+
+
+    # 创建定时任务子线程并启动
+    schedule_thread = threading.Thread(target=run_schedule)
+    schedule_thread.start()
+
+
+    # 启动动态文案
+    async def run_trends_copywriting():
+        global config
+
+        try:
+            if False == config.get("trends_copywriting", "enable"):
+                return
+
+            logging.info(f"动态文案任务线程运行中...")
+
+            while True:
+                # 文案文件路径列表
+                copywriting_file_path_list = []
+
+                # 获取动态文案列表
+                for copywriting in config.get("trends_copywriting", "copywriting"):
+                    # 获取文件夹内所有文件的文件绝对路径，包括文件扩展名
+                    for tmp in common.get_all_file_paths(copywriting["folder_path"]):
+                        copywriting_file_path_list.append(tmp)
+
+                    # 是否开启随机播放
+                    if config.get("trends_copywriting", "random_play"):
+                        random.shuffle(copywriting_file_path_list)
+
+                    # 遍历文案文件路径列表  
+                    for copywriting_file_path in copywriting_file_path_list:
+                        # 获取文案文件内容
+                        copywriting_file_content = common.read_file_return_content(copywriting_file_path)
+                        # 是否启用提示词对文案内容进行转换
+                        if copywriting["prompt_change_enable"]:
+                            data_json = {
+                                "user_name": "trends_copywriting",
+                                "content": copywriting["prompt_change_content"] + copywriting_file_content
+                            }
+
+                            # 调用函数进行LLM处理，以及生成回复内容，进行音频合成，需要好好考虑考虑实现
+                            data_json["content"] = my_handle.llm_handle(config.get("chat_type"), data_json)
+                        else:
+                            data_json = {
+                                "user_name": "trends_copywriting",
+                                "content": copywriting_file_content
+                            }
+
+                        # 空数据判断
+                        if data_json["content"] != None and data_json["content"] != "":
+                            # 发给直接复读进行处理
+                            my_handle.reread_handle(data_json)
+
+                            await asyncio.sleep(config.get("trends_copywriting", "play_interval"))
+        except Exception as e:
+            logging.error(traceback.format_exc())
+
+
+    # 创建动态文案子线程并启动
+    threading.Thread(target=lambda: asyncio.run(run_trends_copywriting())).start()
+
+    try:
+        try:
+            video_id = config.get("room_display_id")
+        except Exception as e:
+            logging.error("获取直播间号失败！\n{0}".format(e))
+
+        live = pytchat.create(video_id=video_id)
+        while live.is_alive():
+            try:
+                for c in live.get().sync_items():
+                    # 过滤表情包
+                    chat_raw = re.sub(r':[^\s]+:', '', c.message)
+                    chat_raw = chat_raw.replace('#', '')
+                    if chat_raw != '':
+                        # chat_author makes the chat look like this: "Nightbot: Hello". So the assistant can respond to the user's name
+                        # chat = '[' + c.author.name + ']: ' + chat_raw
+                        # logging.info(chat)
+
+                        content = chat_raw  # 获取弹幕内容
+                        user_name = c.author.name  # 获取发送弹幕的用户昵称
+
+                        logging.info(f"[{user_name}]: {content}")
+
+                        data = {
+                            "platform": "YouTube",
+                            "username": user_name,
+                            "content": content
+                        }
+
+                        my_handle.process_data(data, "comment")
+
+                    # time.sleep(1)
+            except Exception as e:
+                logging.error("Error receiving chat: {0}".format(e))
+    except KeyboardInterrupt:
+        logging.warning('程序被强行退出')
+    finally:
+        logging.warning('关闭连接...')
+        os._exit(0)
+
+
+if __name__ == '__main__':
+    start_server()