diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index 15ebcea..ae5f271 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -30,7 +30,7 @@ jobs: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} run: | docker buildx build \ - --platform=linux/amd64 \ + --platform linux/amd64,linux/arm64 \ --output "type=image,push=true" \ --file ./Dockerfile . \ --tag $(echo "${DOCKER_USERNAME}" | tr '[:upper:]' '[:lower:]')/webmonitor:latest diff --git a/.gitignore b/.gitignore index 9bfd1e2..53e9ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ task/utils/__pycache__/* task/utils/selector/__pycache__/* */__pycache__ .env +ghostdriver.log diff --git a/Dockerfile b/Dockerfile index ff7515a..3d6280b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,54 +1,28 @@ -FROM ubuntu:16.04 +FROM python:3.6-slim-buster ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ENV PORT 5000 ENV USERNAME admin ENV PASSWORD admin +ENV OPENSSL_CONF /etc/ssl/ - -ADD . /app +COPY . /app WORKDIR /app -# 安装 python3.6 -RUN apt-get update \ -&& apt-get install gcc -y\ -&& apt-get install g++ -y\ -&& apt-get install gdb -y\ -&& apt-get install libxml2-dev libxslt-dev -y\ -&& apt-get install python-software-properties -y\ -&& apt-get install software-properties-common -y\ -&& apt-get install libffi-dev -y\ -&& apt-get install libssl-dev -y\ -&& add-apt-repository ppa:deadsnakes/ppa -y\ -&& apt-get update \ -&& apt-get install python3.6-dev -y\ -&& apt-get install python3.6 -y\ -&& rm /usr/bin/python\ -&& ln -s /usr/bin/python3.6 /usr/bin/python\ -&& rm /usr/bin/python3\ -&& ln -s /usr/bin/python3.6 /usr/bin/python3\ -&& apt-get install python3-pip -y\ -&& pip3 install pip -U\ -&& rm /usr/bin/pip3 \ -&& ln -s -f /usr/local/bin/pip3 /usr/bin/pip3\ -&& ln -s -f /usr/local/bin/pip3 /usr/bin/pip - -RUN pip install -r requirements.txt - -RUN apt-get update\ -&& apt-get install wget -y\ -&& apt-get install build-essential chrpath libssl-dev libxft-dev -y\ -&& apt-get install libfreetype6 libfreetype6-dev -y\ -&& apt-get install libfontconfig1 libfontconfig1-dev -y\ -&& export PHANTOM_JS="phantomjs-2.1.1-linux-x86_64"\ -&& wget https://github.com/Medium/phantomjs/releases/download/v2.1.1/$PHANTOM_JS.tar.bz2 -O /tmp/$PHANTOM_JS.tar.bz2 \ -&& tar xvjf /tmp/$PHANTOM_JS.tar.bz2 -C /usr/local/share\ -&& ln -sf /usr/local/share/$PHANTOM_JS/bin/phantomjs /usr/local/bin\ -&& rm /tmp/$PHANTOM_JS.tar.bz2 +RUN set -x; buildDeps='wget build-essential' \ +&& apt-get update && apt-get install -y ${buildDeps} \ +chrpath libssl-dev libxft-dev libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \ +&& rm -rf /var/lib/apt/lists/* \ +&& export OS_ARCH=$(uname -m) \ +&& wget https://github.com/mjysci/phantomjs/releases/download/v2.1.1/phantomjs-2.1.1-linux_${OS_ARCH}.tar.gz -O /tmp/phantomjs-2.1.1-linux_${OS_ARCH}.tar.gz \ +&& tar -xzvf /tmp/phantomjs-2.1.1-linux_${OS_ARCH}.tar.gz -C /usr/local/bin \ +&& rm /tmp/phantomjs-2.1.1-linux_${OS_ARCH}.tar.gz \ +&& pip install -r requirements.txt && pip cache purge \ +&& apt-get purge -y --auto-remove $buildDeps EXPOSE $PORT RUN chmod +x run.sh -CMD ./run.sh $PORT $USERNAME $PASSWORD +CMD ./run.sh $PORT $USERNAME $PASSWORD \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index fb78f77..8d03c03 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,15 +2,20 @@ ![Tests](https://github.com/LogicJake/WebMonitor/workflows/Tests/badge.svg?branch=master&event=push) [![telegram](https://img.shields.io/badge/chat-telegram-brightgreen.svg?style=flat-square)](https://t.me/webmonitor_github) -[中文文档](https://www.logicjake.xyz/WebMonitor) | [English Document](https://www.logicjake.xyz/WebMonitor/#/en/) | [Telegram Group](https://t.me/webmonitor_github) +[中文文档](https://logicjake.github.io/WebMonitor) | [English Document](https://logicjake.github.io/WebMonitor/#/en/) | [Telegram Group](https://t.me/webmonitor_github) ## 特性 * 支持requests请求网页,支持使用PhantomJS抓取异步加载的网页 * 支持 xpath 和 css selector 选择器,支持 JsonPath 提取 json 数据 -* 支持邮件,pushover 和微信提醒(support by server酱) +* 支持邮件,pushover,微信提醒(support by server酱),Bark推送,自定义GET/POST通知, Slack 通知以及 Telegram 通知 +* 支持一个任务多个选择器提取信息 +* 支持自定义消息模板 * 简洁的UI,可视化操作 * 支持自定义请求头,抓取需要登录的网页 * 支持设置监控规则 * 监控RSS更新 -* 数据导入导出 \ No newline at end of file +* 数据导入导出 + +## Buy Me a Coffee +![](fig/donate_wechat.jpg) diff --git a/docs/_sidebar.md b/docs/_sidebar.md index cdd5f27..bd0025f 100644 --- a/docs/_sidebar.md +++ b/docs/_sidebar.md @@ -4,4 +4,4 @@ * [更新日志](changelog.md) * **Links** * [Github](https://github.com/LogicJake/WebMonitor) -* [Blog](https://www.logicjake.xyz) +* [Blog](https://logicjake.github.io) diff --git a/docs/changelog.md b/docs/changelog.md index 6cd7908..81137f8 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,25 @@ ## changelog + +### 2021.2.2 +* 添加 Telegram 通知方式 +### 2021.2.1 +* 添加系统保留选择器名称:url +* 添加 Slack 通知方式 + +### 2021.1.31 +* **支持同时设置多个元素选择器,并安装自定义消息模板发送提醒消息** +* 修复规则 increase 和 decrease 在首次抓取时的错误 +* 添加新监控规则:-without + +### 2021.12.21 +* 添加自定义 GET 和 POST 通知方式(#50) + +### 2021.12.20 +* 添加 bark 通知方式(#48) + +### 2021.12.11 +* docker 支持 arm64 系统(#45) + ### 2020.8.14 * 正则表达式,规则和元素选择器最大长度设为500 * 支持多规则 diff --git a/docs/fig/donate_wechat.jpg b/docs/fig/donate_wechat.jpg new file mode 100644 index 0000000..28ff5e1 Binary files /dev/null and b/docs/fig/donate_wechat.jpg differ diff --git a/docs/fig/rss.png b/docs/fig/rss.png index 510fa90..8502ff4 100644 Binary files a/docs/fig/rss.png and b/docs/fig/rss.png differ diff --git a/docs/fig/rss_setting.png b/docs/fig/rss_setting.png index 4b8a360..07e646f 100644 Binary files a/docs/fig/rss_setting.png and b/docs/fig/rss_setting.png differ diff --git a/docs/fig/status.png b/docs/fig/status.png index 3f9db2d..191143c 100644 Binary files a/docs/fig/status.png and b/docs/fig/status.png differ diff --git a/docs/fig/task_manage.png b/docs/fig/task_manage.png index cd70006..8e437d3 100644 Binary files a/docs/fig/task_manage.png and b/docs/fig/task_manage.png differ diff --git a/docs/fig/task_setting.png b/docs/fig/task_setting.png index dba4f3d..c66ca29 100644 Binary files a/docs/fig/task_setting.png and b/docs/fig/task_setting.png differ diff --git a/docs/how.md b/docs/how.md index 44b93e1..c59a8f0 100644 --- a/docs/how.md +++ b/docs/how.md @@ -1,13 +1,39 @@ ## 设置通知方式 -支持三种通知方式:邮件,pushover 和 Server 酱的微信提醒。邮件提醒只需要设置接收邮箱,微信提醒需要申请 SCKEY,自行搜索 Server 酱注册,简单免费。Pushover 需要填写注册就得到的 User Key。 +支持7种通知方式:邮件,pushover, Server 酱的微信提醒,Bark,自定义GET/POST通知, Slack 通知以及 Telegram 通知。邮件提醒只需要设置接收邮箱,微信提醒需要申请 SCKEY,自行搜索 Server 酱注册,简单免费。Pushover 需要填写注册就得到的 User Key。Bark需要安装[客户端](https://github.com/Finb/Bark)取得对应设备Key。Slack 需要填写“#”开头的 channel 名称,且需要保证 Slack app 已在该 channel 中。 ### 设置系统邮箱 -如果采用邮件提醒,则必须设置系统邮箱,该邮箱为提醒邮件的发信人。自行根据需要使用的邮箱查找相关设置,密码一般指授权码。 +如果采用邮件提醒,则必须设置“系统管理/系统邮箱”,该邮箱为提醒邮件的发信人。自行根据需要使用的邮箱查找相关设置,密码一般指授权码。 系统邮箱配置只需设置一个,多于一个默认只生效第一条。 ### 设置 Pushover Application -如果采用 Pushover 提醒,则必须设置 Pushover api token。 +如果采用 Pushover 提醒,则必须设置“系统管理/Pushover 设置”中的 Pushover api token。 + +### 设置 Slack +如果采用 Slack 提醒,则必须设置“系统管理/Slack 设置”中的 Slack OAuth Access Token。具体教程见:https://github.com/slackapi/python-slack-sdk/blob/main/tutorial/01-creating-the-slack-app.md + +### 设置 Telegram Bot +如果采用 Telegram 提醒,则必须设置“系统管理/Telegram Bot 设置”中的 Telegram Bot Token。 + +### 设置自定义GET/POST通知 +如果采用自定义通知,则必须设置自定义网址。 +#### GET +用`{header}`和`{content}`替换掉标题和内容的位置。以Bark为例,格式如下: +``` +https://api.day.app/yourkey/{header}/{content} +``` +#### POST +`发送网址{data=}`。将要发送的`body`内容放在`{data=}`内,其中`{header}`和`{content}`替换掉标题和内容的位置。以WxPusher为例,格式如下: +``` +http://wxpusher.zjiecode.com/api/send/message{data={ + "appToken":"AT_xxx", + "content":{content}, + "summary":{header}, + "contentType":3, + "uids":["UID_xxxx"], + "url":"http://wxpusher.zjiecode.com" +}} +``` ## 添加网页监控任务 在 任务管理 > 网页监控管理 添加新任务 @@ -15,11 +41,20 @@ * 必须选择一种通知方式 * 默认抓取频率为5分钟,自行根据需要调整,单位分钟,不建议调太快,以防反爬 -![任务管理](./fig/task_manage.png) -![添加任务](./fig/task_setting.png) ### 选择器 -元素选择器类型可以选择 Xpath, Css selector 或 JsonPath,可以借助浏览器 F12 直接 copy 前两种选择器,需要注意的是,往往浏览器 copy 得到是元素,而不是文本信息,需要做以下补充: +元素选择器类型可以选择 Xpath, Css selector 或 JsonPath。 + +一行一个元素选择器,每一行的格式为:选择器名称{选择器内容},例如: +``` +title{//*[@id="id3"]/h3/text()} +myurl{//*[@id="id3"]/h3/text()} +``` + +```以下字段为系统默认保留字段,请不要使用且无法被覆盖:``` +* url:该任务对应的监控网址 + +可以借助浏览器 F12 直接 copy 前两种选择器,需要注意的是,往往浏览器 copy 得到是元素,而不是文本信息,需要做以下补充: #### xpath * 获取元素文本信息,在浏览器得到的选择器后加```/text()```,如 @@ -40,6 +75,19 @@ #### JsonPath 针对返回 json 数据的接口, 可以使用 JsonPath 提取数据, 具体教程参考 https://goessner.net/articles/JsonPath/ +在Chrome F12开发者工具中,也可以找到对应元素,然后右键该元素,选择“Copy Property Path”。 + +### 消息体模板 +消息体模板可为空,如果为空,则按照元素选择器的定义顺序以制表符为间隔拼接为字符串。下面介绍消息体模板的使用方式,如果元素选择器的设置为: +``` +title{//*[@id="id3"]/h3/text()} +myurl{//*[@id="id3"]/h3/text()} +``` +则消息体模板可以设置为: +``` +{title}的网址是{myurl} +``` +如果title对应的元素选择器提取的内容为“WebMonitor真棒”,myurl对应的元素选择器提取的内容为“https://www.logicjake.xyz/WebMonitor”,则得到的消息内容为“WebMonitor真棒的网址是https://www.logicjake.xyz/WebMonitor”。 ### 是否选择无头浏览器 如果源网页没有异步加载,可以不使用无头浏览器获取网页 @@ -56,6 +104,11 @@ 规则格式:-规则 参数 支持以下规则: +#### -without +如:文本发生变化且文本内容不包含```上架``` +``` +-without 上架 +``` #### -contain 如:文本发生变化且文本内容包含```上架``` ``` @@ -104,9 +157,6 @@ ## 添加RSS监控任务 可以在 任务管理 > RSS监控任务管理 添加新RSS监控任务 -![RSS](./fig/rss.png) - -![RSS设置](./fig/rss_setting.png) ## 任务状态查看 可以在任务状态栏目下查看所有任务,包括任务状态(run or stop),上次运行时间,上次运行结果,运行结果包括三类: @@ -115,9 +165,8 @@ * 成功执行但未监测到变化 * 出错显示异常信息 -![任务状态](./fig/status.png) 可以通过修改任务状态,暂停或重启任务 ## 数据导入导出 -***WARNING: 网页监控任务和RSS监控任务的通知方式是通过外键与通知方式表连接,在数据表发生变化的情况下,外键id可能失效或无法和导出时保持一致,建议每次导入任务数据后检查通知方式是否正常。*** \ No newline at end of file +***WARNING: 网页监控任务和RSS监控任务的通知方式是通过外键与通知方式表连接,在数据表发生变化的情况下,外键id可能失效或无法和导出时保持一致,建议每次导入任务数据后检查通知方式是否正常。*** diff --git a/requirements.txt b/requirements.txt index 997dbf2..9ef6c6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ jsonpath==0.82 django==2.2.13 django-simpleui==3.9.1 django-apscheduler==0.3.0 -django-import-export==2.0.2 \ No newline at end of file +django-import-export==2.0.2 +slack-sdk==3.2.1 \ No newline at end of file diff --git a/setting/admin.py b/setting/admin.py index f85f3a1..c71db0a 100644 --- a/setting/admin.py +++ b/setting/admin.py @@ -1,8 +1,9 @@ +from setting.views import log_view from django.contrib import admin from import_export import resources from import_export.admin import ImportExportModelAdmin -from .models import Notification, PushoverSetting, SystemMailSetting +from .models import Notification, PushoverSetting, SystemMailSetting, Log, SlackSetting, TelegramSetting class SystemMailSettingResource(resources.ModelResource): @@ -74,3 +75,43 @@ class NotificationAdmin(ImportExportModelAdmin): list_display_links = None actions_on_top = True + + +@admin.register(Log) +class FeedbackStatsAdmin(admin.ModelAdmin): + def changelist_view(self, request, extra_content=None): + return log_view(request) + + +class SlackSettingResource(resources.ModelResource): + class Meta: + model = PushoverSetting + skip_unchanged = True + report_skipped = True + + +@admin.register(SlackSetting) +class SlackSettingAdmin(admin.ModelAdmin): + resource_class = SlackSettingResource + + list_display = ['token'] + list_editable = ('token', ) + + list_display_links = None + + +class TelegramSettingResource(resources.ModelResource): + class Meta: + model = TelegramSetting + skip_unchanged = True + report_skipped = True + + +@admin.register(TelegramSetting) +class TelegramSettingAdmin(admin.ModelAdmin): + resource_class = TelegramSettingResource + + list_display = ['token'] + list_editable = ('token', ) + + list_display_links = None diff --git a/setting/apps.py b/setting/apps.py index 2dce49b..0c63129 100644 --- a/setting/apps.py +++ b/setting/apps.py @@ -3,4 +3,4 @@ class SettingConfig(AppConfig): name = 'setting' - verbose_name = '设置' + verbose_name = '系统管理' diff --git a/setting/migrations/0001_initial.py b/setting/migrations/0001_initial.py index 49b06ce..388de9c 100644 --- a/setting/migrations/0001_initial.py +++ b/setting/migrations/0001_initial.py @@ -16,7 +16,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('name', models.CharField(default='默认名称', max_length=32, unique=True, verbose_name='通知方式名称')), - ('type', models.IntegerField(choices=[(0, '邮箱'), (1, '微信'), (2, 'pushover')], default='邮箱', verbose_name='通知方式类型')), + ('type', models.IntegerField(choices=[(0, '邮箱'), (1, '微信'), (2, 'pushover'), (3, 'Bark'), (4, '自定义通知')], default='邮箱', verbose_name='通知方式类型')), ('content', models.CharField(max_length=100, verbose_name='通知方式')), ], options={ diff --git a/setting/migrations/0002_auto_20210131_1925.py b/setting/migrations/0002_auto_20210131_1925.py new file mode 100644 index 0000000..e7d726d --- /dev/null +++ b/setting/migrations/0002_auto_20210131_1925.py @@ -0,0 +1,28 @@ +# Generated by Django 2.2.13 on 2021-01-31 19:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Log', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + options={ + 'verbose_name': '日志查看', + 'verbose_name_plural': '日志查看', + }, + ), + migrations.AlterField( + model_name='notification', + name='content', + field=models.CharField(max_length=512, verbose_name='邮箱地址 / Server 酱 SCKEY / Pushover User Key / Bark key / 自定义网址'), + ), + ] diff --git a/setting/migrations/0003_auto_20210201_2104.py b/setting/migrations/0003_auto_20210201_2104.py new file mode 100644 index 0000000..066d9b8 --- /dev/null +++ b/setting/migrations/0003_auto_20210201_2104.py @@ -0,0 +1,29 @@ +# Generated by Django 2.2.13 on 2021-02-01 21:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0002_auto_20210131_1925'), + ] + + operations = [ + migrations.CreateModel( + name='SlackBotSetting', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('token', models.CharField(max_length=100, verbose_name='Slack OAuth Access Token')), + ], + options={ + 'verbose_name': 'Slack Bot 设置', + 'verbose_name_plural': 'Slack Bot 设置', + }, + ), + migrations.AlterField( + model_name='notification', + name='content', + field=models.CharField(max_length=512, verbose_name='邮箱地址 / Server 酱 SCKEY / Pushover User Key / Bark key / 自定义网址 / Slack channel'), + ), + ] diff --git a/setting/migrations/0004_auto_20210201_2117.py b/setting/migrations/0004_auto_20210201_2117.py new file mode 100644 index 0000000..3439d88 --- /dev/null +++ b/setting/migrations/0004_auto_20210201_2117.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2021-02-01 21:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0003_auto_20210201_2104'), + ] + + operations = [ + migrations.AlterField( + model_name='notification', + name='type', + field=models.IntegerField(choices=[(0, '邮箱'), (1, '微信'), (2, 'pushover'), (3, 'Bark'), (4, '自定义通知'), (5, 'Slack bot')], default='邮箱', verbose_name='通知方式类型'), + ), + ] diff --git a/setting/migrations/0005_auto_20210201_2126.py b/setting/migrations/0005_auto_20210201_2126.py new file mode 100644 index 0000000..4ecd377 --- /dev/null +++ b/setting/migrations/0005_auto_20210201_2126.py @@ -0,0 +1,26 @@ +# Generated by Django 2.2.13 on 2021-02-01 21:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0004_auto_20210201_2117'), + ] + + operations = [ + migrations.RenameModel( + old_name='SlackBotSetting', + new_name='SlackSetting', + ), + migrations.AlterModelOptions( + name='slacksetting', + options={'verbose_name': 'Slack 设置', 'verbose_name_plural': 'Slack 设置'}, + ), + migrations.AlterField( + model_name='notification', + name='type', + field=models.IntegerField(choices=[(0, '邮箱'), (1, '微信'), (2, 'pushover'), (3, 'Bark'), (4, '自定义通知'), (5, 'Slack')], default='邮箱', verbose_name='通知方式类型'), + ), + ] diff --git a/setting/migrations/0006_auto_20210203_1749.py b/setting/migrations/0006_auto_20210203_1749.py new file mode 100644 index 0000000..b24b493 --- /dev/null +++ b/setting/migrations/0006_auto_20210203_1749.py @@ -0,0 +1,34 @@ +# Generated by Django 2.2.13 on 2021-02-03 17:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0005_auto_20210201_2126'), + ] + + operations = [ + migrations.CreateModel( + name='TelegramSetting', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('token', models.CharField(max_length=100, verbose_name='Telegram Bot Token')), + ], + options={ + 'verbose_name': 'Telegram Bot 设置', + 'verbose_name_plural': 'Telegram Bot 设置', + }, + ), + migrations.AlterField( + model_name='notification', + name='content', + field=models.CharField(max_length=512, verbose_name='邮箱地址 / Server 酱 SCKEY / Pushover User Key / Bark key / 自定义网址 / Slack channel / Telegram chat_id'), + ), + migrations.AlterField( + model_name='notification', + name='type', + field=models.IntegerField(choices=[(0, '邮箱'), (1, '微信'), (2, 'pushover'), (3, 'Bark'), (4, '自定义通知'), (5, 'Slack'), (6, 'Telegram')], default='邮箱', verbose_name='通知方式类型'), + ), + ] diff --git a/setting/models.py b/setting/models.py index 38bbed2..e9bb55e 100644 --- a/setting/models.py +++ b/setting/models.py @@ -42,7 +42,8 @@ def __str__(self): class Notification(models.Model): - type_choice = ((0, '邮箱'), (1, '微信'), (2, 'pushover')) + type_choice = ((0, '邮箱'), (1, '微信'), (2, 'pushover'), (3, 'Bark'), + (4, '自定义通知'), (5, 'Slack'), (6, 'Telegram')) name = models.CharField(max_length=32, null=False, verbose_name='通知方式名称', @@ -52,7 +53,11 @@ class Notification(models.Model): choices=type_choice, default='邮箱', verbose_name='通知方式类型') - content = models.CharField(max_length=100, null=False, verbose_name='通知方式') + content = models.CharField(max_length=512, + null=False, + verbose_name='邮箱地址 / Server 酱 SCKEY / \ + Pushover User Key / Bark key / 自定义网址 / Slack channel / Telegram chat_id' + ) class Meta: verbose_name = "通知方式" @@ -60,3 +65,35 @@ class Meta: def __str__(self): return self.name + + +class Log(models.Model): + class Meta: + verbose_name = "日志查看" + verbose_name_plural = "日志查看" + + +class SlackSetting(models.Model): + token = models.CharField(max_length=100, + null=False, + verbose_name='Slack OAuth Access Token') + + class Meta: + verbose_name = "Slack 设置" + verbose_name_plural = "Slack 设置" + + def __str__(self): + return 'Slack ' + self.token + + +class TelegramSetting(models.Model): + token = models.CharField(max_length=100, + null=False, + verbose_name='Telegram Bot Token') + + class Meta: + verbose_name = "Telegram Bot 设置" + verbose_name_plural = "Telegram Bot 设置" + + def __str__(self): + return 'Telegram Bot ' + self.token diff --git a/setting/static/css/log.css b/setting/static/css/log.css new file mode 100644 index 0000000..63d44dc --- /dev/null +++ b/setting/static/css/log.css @@ -0,0 +1,7 @@ +.log { + width: 100%; + height: 500px; + overflow: scroll; + overflow-x: hidden; + white-space: pre-line; +} \ No newline at end of file diff --git a/setting/static/js/log.js b/setting/static/js/log.js new file mode 100644 index 0000000..08e6a25 --- /dev/null +++ b/setting/static/js/log.js @@ -0,0 +1,4 @@ +window.onload = function () { + var hid = document.getElementById('msg_end'); + hid.scrollIntoView(false); +} diff --git a/setting/templates/log.html b/setting/templates/log.html new file mode 100644 index 0000000..4d0698e --- /dev/null +++ b/setting/templates/log.html @@ -0,0 +1,14 @@ +{% extends "admin/base_site.html" %} +{% load static %} + + +{% block content %} + + + +
+

{{ content }}

+ +
+ +{% endblock %} \ No newline at end of file diff --git a/setting/views.py b/setting/views.py index 91ea44a..21e8aba 100644 --- a/setting/views.py +++ b/setting/views.py @@ -1,3 +1,16 @@ from django.shortcuts import render +import os + # Create your views here. +def log_view(request): + BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + log_path = os.path.join(BASE_DIR, 'static', 'log', 'log.txt') + + content = '日志文件不存在' + if os.path.exists(log_path): + with open(log_path, 'r') as f: + content = f.readlines()[-50:] + content = ''.join(content) + return render(request, 'log.html', {'content': content}) diff --git a/task/migrations/0004_auto_20210131_1925.py b/task/migrations/0004_auto_20210131_1925.py new file mode 100644 index 0000000..b55f3d2 --- /dev/null +++ b/task/migrations/0004_auto_20210131_1925.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2021-01-31 19:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('task', '0003_auto_20200814_1333'), + ] + + operations = [ + migrations.AlterField( + model_name='task', + name='selector', + field=models.TextField(verbose_name='元素选择器'), + ), + ] diff --git a/task/migrations/0005_task_template.py b/task/migrations/0005_task_template.py new file mode 100644 index 0000000..969a11c --- /dev/null +++ b/task/migrations/0005_task_template.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2021-01-31 20:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('task', '0004_auto_20210131_1925'), + ] + + operations = [ + migrations.AddField( + model_name='task', + name='template', + field=models.TextField(blank=True, verbose_name='消息体模板'), + ), + ] diff --git a/task/migrations/0006_auto_20210201_1755.py b/task/migrations/0006_auto_20210201_1755.py new file mode 100644 index 0000000..afb703c --- /dev/null +++ b/task/migrations/0006_auto_20210201_1755.py @@ -0,0 +1,23 @@ +# Generated by Django 2.2.13 on 2021-02-01 17:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('task', '0005_task_template'), + ] + + operations = [ + migrations.AlterField( + model_name='task', + name='selector', + field=models.TextField(help_text='一行一个元素选择器,每一行的格式为:选择器名称{选择器内容},例如:title{//*[@id="id3"]/h3/text()}。其中 url 为系统保留选择器名称,请不要使用且无法被覆盖', verbose_name='元素选择器'), + ), + migrations.AlterField( + model_name='task', + name='template', + field=models.TextField(blank=True, help_text='可为空,自定义发送的通知内容格式,按照选择器名称进行替换,具体示例见文档', verbose_name='消息体模板'), + ), + ] diff --git a/task/migrations/0007_auto_20210203_1817.py b/task/migrations/0007_auto_20210203_1817.py new file mode 100644 index 0000000..3d711b7 --- /dev/null +++ b/task/migrations/0007_auto_20210203_1817.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2021-02-03 18:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('task', '0006_auto_20210201_1755'), + ] + + operations = [ + migrations.AlterField( + model_name='task', + name='selector', + field=models.TextField(help_text='一行一个元素选择器,每一行的格式为:选择器名称{选择器内容}, 例如:title{//*[@id="id3"]/h3/text()}。其中 url 为系统保留选择器名称,请不要使用且无法被覆盖', verbose_name='元素选择器'), + ), + ] diff --git a/task/migrations/0008_auto_20210314_1924.py b/task/migrations/0008_auto_20210314_1924.py new file mode 100644 index 0000000..74e085a --- /dev/null +++ b/task/migrations/0008_auto_20210314_1924.py @@ -0,0 +1,24 @@ +# Generated by Django 2.2.13 on 2021-03-14 19:24 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('task', '0007_auto_20210203_1817'), + ] + + operations = [ + migrations.AlterField( + model_name='rsstask', + name='url', + field=models.CharField(max_length=1000, validators=[django.core.validators.URLValidator()], verbose_name='RSS地址'), + ), + migrations.AlterField( + model_name='task', + name='url', + field=models.CharField(max_length=1000, validators=[django.core.validators.URLValidator()], verbose_name='监控网址'), + ), + ] diff --git a/task/models.py b/task/models.py index e220ab2..6be507e 100644 --- a/task/models.py +++ b/task/models.py @@ -100,7 +100,7 @@ def short_last_status(self): class Task(models.Model): name = models.CharField(max_length=100, verbose_name='任务名称', null=False) - url = models.CharField(max_length=500, + url = models.CharField(max_length=1000, verbose_name='监控网址', null=False, validators=[URLValidator()]) @@ -115,11 +115,15 @@ class Task(models.Model): null=False, default='Xpath', choices=selector_choices) - selector = models.CharField(max_length=500, - verbose_name='元素选择器', - null=False) + selector = models.TextField(verbose_name='元素选择器', + blank=False, + help_text='一行一个元素选择器,每一行的格式为:选择器名称{选择器内容},\ + 例如:title{//*[@id="id3"]/h3/text()}。其中 url 为系统保留选择器名称,请不要使用且无法被覆盖') + template = models.TextField( + verbose_name='消息体模板', + blank=True, + help_text='可为空,自定义发送的通知内容格式,按照选择器名称进行替换,具体示例见文档') is_chrome_choices = ((0, 'no'), (1, 'yes')) - is_chrome = models.IntegerField(null=False, default='no', verbose_name='是否使用无头浏览器', @@ -197,7 +201,7 @@ def delete(self, *args, **kwargs): class RSSTask(models.Model): name = models.CharField(max_length=32, null=False, verbose_name='任务名称') - url = models.CharField(max_length=500, + url = models.CharField(max_length=1000, null=False, verbose_name='RSS地址', validators=[URLValidator()]) diff --git a/task/utils/extract_info.py b/task/utils/extract_info.py index e3791ff..0a49365 100644 --- a/task/utils/extract_info.py +++ b/task/utils/extract_info.py @@ -1,8 +1,9 @@ +import logging import re +from collections import OrderedDict import feedparser from func_timeout import func_set_timeout -import logging from task.utils.selector.selector_handler import new_handler logger = logging.getLogger('main') @@ -12,18 +13,31 @@ def extract_by_re(conetnt, regular_expression): m = re.search(regular_expression, conetnt) if m: - return m.group() - elif m == None: + return m.group(1) + elif not m: return "未检测到相关内容" else: logger.error('{} 无法使用正则提取'.format(regular_expression)) raise Exception('无法使用正则提取') +def wrap_template_content(content_dict, content_template): + if content_template == '': + content_template = '\t'.join( + ['{' + k + '}' for k in content_dict.keys()]) + + for k, v in content_dict.items(): + content_template = content_template.replace('{' + k + '}', v) + + content = content_template + return content + + def get_content(url, is_chrome, selector_type, selector, + content_template, regular_expression=None, headers=None, debug=False): @@ -32,16 +46,35 @@ def get_content(url, else: selector_handler = new_handler('phantomjs', debug) + # 兼容旧版本,默认转为{content} + selector_dict = OrderedDict() + if '{' not in selector: + selector_dict['content'] = selector + else: + selector_split_list = selector.split('\n') + for selector_split in selector_split_list: + selector_split = selector_split.strip() + key, value = selector_split.split('{') + value = value.split('}')[0] + selector_dict[key] = value + if selector_type == 0: - content = selector_handler.get_by_xpath(url, selector, headers) + content_dict = selector_handler.get_by_xpath(url, selector_dict, + headers) elif selector_type == 1: - content = selector_handler.get_by_css(url, selector, headers) + content_dict = selector_handler.get_by_css(url, selector_dict, headers) elif selector_type == 2: - content = selector_handler.get_by_json(url, selector, headers) + content_dict = selector_handler.get_by_json(url, selector_dict, + headers) else: logger.error('无效选择器') raise Exception('无效选择器') + # 添加或替换保留字段:{url} + if 'url' in content_dict: + content_dict['url'] = url + content = wrap_template_content(content_dict, content_template) + if regular_expression: content = extract_by_re(content, regular_expression) return content diff --git a/task/utils/notification/bark_notification.py b/task/utils/notification/bark_notification.py new file mode 100644 index 0000000..549c730 --- /dev/null +++ b/task/utils/notification/bark_notification.py @@ -0,0 +1,36 @@ +import json +import logging +import re +import requests + +from task.utils.notification.notification import Notification +import urllib.parse + +logger = logging.getLogger('main') + + +def getUrlQuery(content): + """ + Extract the first URL in the content with format of '?url=URL', return '' if none URL found. + """ + regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" + urls = re.findall(regex, content) + if len(urls): + url = [x[0] for x in urls][0] + url_query = f'?url={urllib.parse.quote_plus(url)}' + return url_query + return '' + + +class BarkNotification(Notification): + def send(self, to, header, content): + if to == '默认': + logger.error('没有设置Bark KEY,无法发送Bark通知') + raise Exception('没有设置Bark KEY,无法发送Bark通知') + url = 'https://api.day.app/{}/{}/{}{}'.format( + to, header, urllib.parse.quote_plus(content), getUrlQuery(content)) + r = requests.post(url) + + res = json.loads(r.text) + if res['code'] != 200: + raise Exception(res['message']) diff --git a/task/utils/notification/custom_notification.py b/task/utils/notification/custom_notification.py new file mode 100644 index 0000000..49de86b --- /dev/null +++ b/task/utils/notification/custom_notification.py @@ -0,0 +1,33 @@ +import json +import logging + +import requests + +from task.utils.notification.notification import Notification +import urllib.parse + +logger = logging.getLogger('main') + + +class CustomNotification(Notification): + def send(self, to, header, content): + if to == '默认': + logger.error('没有设置通知网址,无法发送自定义通知') + raise Exception('没有设置通知网址,无法发送自定义通知') + loc = to.find("{data=") + if loc == -1: + url = to.replace('{header}', + urllib.parse.quote_plus(header)).replace( + '{content}', urllib.parse.quote_plus(content)) + r = requests.get(url) + res = json.loads(r.text) + logger.debug('自定义[GET]通知:{},结果:{}'.format(url, res)) + + else: + url = to[:loc] + data = to[loc + 6:to.rfind("}")].replace( + '{header}', + json.dumps(header)).replace('{content}', json.dumps(content)) + r = requests.post(url, json=json.loads(data)) + res = json.loads(r.text) + logger.debug('自定义[POST]通知:{},传输数据:{},结果:{}'.format(url, data, res)) diff --git a/task/utils/notification/notification_handler.py b/task/utils/notification/notification_handler.py index ae8dc60..4440c9f 100644 --- a/task/utils/notification/notification_handler.py +++ b/task/utils/notification/notification_handler.py @@ -1,6 +1,10 @@ from task.utils.notification.mail_notification import MailNotification from task.utils.notification.wechat_notification import WechatNotification from task.utils.notification.pushover_notification import PushoverNotification +from task.utils.notification.bark_notification import BarkNotification +from task.utils.notification.custom_notification import CustomNotification +from task.utils.notification.slack_notification import SlackNotification +from task.utils.notification.telegram_notification import TelegramNotification import logging logger = logging.getLogger('main') @@ -13,6 +17,14 @@ def new_handler(name): return WechatNotification() elif name == 'pushover': return PushoverNotification() + elif name == 'bark': + return BarkNotification() + elif name == 'custom': + return CustomNotification() + elif name == 'slack': + return SlackNotification() + elif name == 'telegram': + return TelegramNotification() else: logger.error('通知方式错误') raise Exception('通知方式错误') diff --git a/task/utils/notification/pushover_notification.py b/task/utils/notification/pushover_notification.py index e602298..442954e 100755 --- a/task/utils/notification/pushover_notification.py +++ b/task/utils/notification/pushover_notification.py @@ -1,10 +1,9 @@ import json import logging -import os import traceback import requests - +from requests.exceptions import RequestException from setting.models import PushoverSetting from task.utils.notification.notification import Notification @@ -20,7 +19,7 @@ def __init__(self): logger.error(traceback.format_exc()) raise Exception('没有设置Pushover API Token,无法发送通知') - self.token = setting.token + self.token = setting.api_token def send(self, to, header, content): if to == '默认': @@ -28,7 +27,7 @@ def send(self, to, header, content): raise Exception('没有设置Prushover User Key,无法发送推送通知') token = self.token sendData = { - 'token': token, # 监控猫 Api Token + 'token': token, 'user': to, 'message': '【' + header + '】有更新!\n>>>新内容为:\n' + content, } @@ -36,11 +35,16 @@ def send(self, to, header, content): try: response = requests.post(pushoverApi, sendData, timeout=5) - except requests.exceptions.RequestException as e: - logger.error('请求错误') + except RequestException as e: + logger.error('请求错误', traceback.format_exc()) raise Exception('Error: {}'.format(e)) res = json.loads(response.text) if res['status'] != 1: raise Exception(res['errors']) + elif 'info' in res: + if 'no active devices to send to' in res['info']: + raise Exception('User key 对应的账户无激活设备,需要先行到官网购买 License') + else: + logger.debug(res['info']) diff --git a/task/utils/notification/slack_notification.py b/task/utils/notification/slack_notification.py new file mode 100644 index 0000000..45cd796 --- /dev/null +++ b/task/utils/notification/slack_notification.py @@ -0,0 +1,33 @@ +import logging +import traceback + +from setting.models import SlackSetting +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError +from task.utils.notification.notification import Notification + +logger = logging.getLogger('main') + + +class SlackNotification(Notification): + def __init__(self): + try: + setting = SlackSetting.objects.first() + except Exception: + logger.error('没有设置 Slack OAuth Access Token,无法发送通知') + logger.error(traceback.format_exc()) + raise Exception('没有设置 Slack OAuth Access Token,无法发送通知') + + self.token = setting.token + + def send(self, to, header, content): + if to == '默认': + logger.error('没有设置 channel 名称,无法发送 Slack 通知') + raise Exception('没有设置 channel 名称,无法发送 Slack 通知') + client = WebClient(token=self.token) + + try: + client.chat_postMessage(channel=to, + text='{}:{}'.format(header, content)) + except SlackApiError as e: + raise Exception(e.response['error']) diff --git a/task/utils/notification/telegram_notification.py b/task/utils/notification/telegram_notification.py new file mode 100644 index 0000000..9765450 --- /dev/null +++ b/task/utils/notification/telegram_notification.py @@ -0,0 +1,34 @@ +import logging +import traceback +import urllib.parse + +import requests +from setting.models import TelegramSetting +from task.utils.notification.notification import Notification + +logger = logging.getLogger('main') + + +class TelegramNotification(Notification): + def __init__(self): + try: + setting = TelegramSetting.objects.first() + except Exception: + logger.error('没有设置 Telegram bot token,无法发送通知') + logger.error(traceback.format_exc()) + raise Exception('没有设置 Telegram bot token,无法发送通知') + + self.token = setting.token + + def send(self, to, header, content): + if to == '默认': + logger.error('没有设置 chat_id,无法发送 Telegram 通知') + raise Exception('没有设置 chat_id,无法发送 Telegram 通知') + + r = requests.get( + 'https://api.telegram.org/bot{}/sendMessage?chat_id={}&text={}'. + format(self.token, to, + urllib.parse.quote_plus('{}: {}'.format(header, content)))) + result = r.json() + if not result['ok']: + raise Exception(result['description']) diff --git a/task/utils/notification/wechat_notification.py b/task/utils/notification/wechat_notification.py index 63e56be..cde6aa8 100644 --- a/task/utils/notification/wechat_notification.py +++ b/task/utils/notification/wechat_notification.py @@ -15,9 +15,9 @@ def send(self, to, header, content): logger.error('没有设置Server酱 SCKEY,无法发送微信通知') raise Exception('没有设置Server酱 SCKEY,无法发送微信通知') data = {'text': header, 'desp': content} - url = 'https://sc.ftqq.com/{}.send'.format(to) + url = 'https://sctapi.ftqq.com/{}.send'.format(to) r = requests.post(url, data=data) res = json.loads(r.text) - if res['errno'] != 0: - raise Exception(res['errmsg']) + if res['data']['errno'] != 0: + raise Exception(res['data']['errmsg']) diff --git a/task/utils/rule.py b/task/utils/rule.py index 8672575..ac1fbb1 100644 --- a/task/utils/rule.py +++ b/task/utils/rule.py @@ -1,3 +1,18 @@ +def parse_without(args, content, last_content): + ''' + 新内容中是否不包含某个字符串 + -without 上架 + ''' + if args[0] != '-without': + return False + + value = args[1] + + if value not in content: + return True + return False + + def parse_contain(args, content, last_content): ''' 新内容中是否包含某个字符串 @@ -22,6 +37,9 @@ def parse_increase(args, content, last_content): if args[0] != '-increase': return False + if last_content == '': + return False + value = args[1] last_content = float(last_content) @@ -42,6 +60,9 @@ def parse_decrease(args, content, last_content): if args[0] != '-decrease': return False + if last_content == '': + return False + value = args[1] last_content = float(last_content) @@ -111,8 +132,8 @@ def parse_more(args, content, last_content): rule_funs = [ - parse_contain, parse_increase, parse_decrease, parse_equal, parse_less, - parse_more + parse_without, parse_contain, parse_increase, parse_decrease, parse_equal, + parse_less, parse_more ] diff --git a/task/utils/scheduler.py b/task/utils/scheduler.py index e3a5952..f3e5ce4 100644 --- a/task/utils/scheduler.py +++ b/task/utils/scheduler.py @@ -28,11 +28,6 @@ def wraper_rss_msg(item): return res -def wraper_msg(content, link): - res = '''[{}]({})'''.format(content, link) - return res - - def send_message(content, header, notifications): if len(notifications) == 0: raise Exception('通知方式为空') @@ -74,6 +69,38 @@ def send_message(content, header, notifications): fail += 1 exception_content += 'Pushover Exception: {};'.format(repr(e)) + try: + if type == 3: + handler = new_handler('bark') + handler.send(notification_detail, header, content) + except Exception as e: + fail += 1 + exception_content += 'Bark Exception: {};'.format(repr(e)) + + try: + if type == 4: + handler = new_handler('custom') + handler.send(notification_detail, header, content) + except Exception as e: + fail += 1 + exception_content += 'Custom Exception: {};'.format(repr(e)) + + try: + if type == 5: + handler = new_handler('slack') + handler.send(notification_detail, header, content) + except Exception as e: + fail += 1 + exception_content += 'Slack Exception: {};'.format(repr(e)) + + try: + if type == 6: + handler = new_handler('telegram') + handler.send(notification_detail, header, content) + except Exception as e: + fail += 1 + exception_content += 'Telegram Exception: {};'.format(repr(e)) + if fail > 0: if fail < total: raise PartNotificationError('监测到变化,部分通知方式发送错误:' + @@ -85,6 +112,7 @@ def send_message(content, header, notifications): def monitor(id, type): status = '' global_content = None + last = None try: if type == 'html': task = Task.objects.get(pk=id) @@ -93,6 +121,7 @@ def monitor(id, type): selector_type = task.selector_type selector = task.selector is_chrome = task.is_chrome + content_template = task.template notifications = [i for i in task.notification.iterator()] @@ -107,7 +136,8 @@ def monitor(id, type): last_content = last.content content = get_content(url, is_chrome, selector_type, selector, - regular_expression, headers) + content_template, regular_expression, + headers) global_content = content status_code = is_changed(rule, content, last_content) logger.info( @@ -119,14 +149,12 @@ def monitor(id, type): last.save() elif status_code == 2: status = '监测到变化,且命中规则,最新值为{}'.format(content) - msg = wraper_msg(content, url) - send_message(msg, name, notifications) + send_message(content, name, notifications) last.content = content last.save() elif status_code == 3: status = '监测到变化,最新值为{}'.format(content) - msg = wraper_msg(content, url) - send_message(msg, name, notifications) + send_message(content, name, notifications) last.content = content last.save() elif status_code == 0: @@ -174,6 +202,7 @@ def monitor(id, type): def add_job(id, interval, type='html'): + task_id = '' if type == 'html': task_id = id elif type == 'rss': @@ -195,6 +224,8 @@ def add_job(id, interval, type='html'): def remove_job(id, type='html'): + task_id = '' + if type == 'html': task_id = id elif type == 'rss': diff --git a/task/utils/selector/phantomjs_selector.py b/task/utils/selector/phantomjs_selector.py index 43eddd3..ba10e1f 100644 --- a/task/utils/selector/phantomjs_selector.py +++ b/task/utils/selector/phantomjs_selector.py @@ -1,13 +1,14 @@ import ast import warnings +from collections import OrderedDict -from scrapy.selector import Selector from selenium import webdriver - -from task.utils.selector.selector import Selector as FatherSelector +from task.utils.selector.selector import SelectorABC as FatherSelector warnings.filterwarnings("ignore") +USERAGENT = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' + class PhantomJSSelector(FatherSelector): def __init__(self, debug=False): @@ -16,7 +17,7 @@ def __init__(self, debug=False): def get_html(self, url, headers): # 默认userAgent webdriver.DesiredCapabilities.PHANTOMJS[ - 'phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' + 'phantomjs.page.settings.userAgent'] = USERAGENT if headers: header_dict = ast.literal_eval(headers) @@ -43,26 +44,29 @@ def get_html(self, url, headers): driver.quit() return html - def get_by_xpath(self, url, xpath, headers=None): + def get_by_xpath(self, url, selector_dict, headers=None): html = self.get_html(url, headers) - if 'string()' in xpath: - xpath = xpath.split('/') - xpath = '/'.join(xpath[:-1]) - res = Selector( - text=html).xpath(xpath)[0].xpath('string(.)').extract() - else: - res = Selector(text=html).xpath(xpath).extract() - - if len(res) != 0: - return res[0] - else: - raise Exception('无法获取文本信息') - - def get_by_css(self, url, xpath, headers=None): + + result = OrderedDict() + for key, xpath_ext in selector_dict.items(): + result[key] = self.xpath_parse(html, xpath_ext) + + return result + + def get_by_css(self, url, selector_dict, headers=None): html = self.get_html(url, headers) - res = Selector(text=html).css(xpath).extract() - if len(res) != 0: - return res[0] - else: - raise Exception('无法获取文本信息') + result = OrderedDict() + for key, css_ext in selector_dict.items(): + result[key] = self.css_parse(html, css_ext) + + return result + + def get_by_json(self, url, selector_dict, headers=None): + html = self.get_html(url, headers) + + result = OrderedDict() + for key, json_ext in selector_dict.items(): + result[key] = self.json_parse(html, json_ext) + + return result diff --git a/task/utils/selector/request_selector.py b/task/utils/selector/request_selector.py index 515249b..dbc452b 100644 --- a/task/utils/selector/request_selector.py +++ b/task/utils/selector/request_selector.py @@ -1,12 +1,8 @@ import ast +from collections import OrderedDict import requests -import json -import jsonpath - -from scrapy.selector import Selector - -from task.utils.selector.selector import Selector as FatherSelector +from task.utils.selector.selector import SelectorABC as FatherSelector class RequestsSelector(FatherSelector): @@ -26,40 +22,29 @@ def get_html(self, url, headers): html = r.text return html - def get_by_xpath(self, url, xpath, headers=None): + def get_by_xpath(self, url, selector_dict, headers=None): html = self.get_html(url, headers) - if 'string()' in xpath: - xpath = xpath.split('/') - xpath = '/'.join(xpath[:-1]) - res = Selector( - text=html).xpath(xpath)[0].xpath('string(.)').extract() - else: - res = Selector(text=html).xpath(xpath).extract() - if len(res) != 0: - return res[0] - else: - raise Exception('无法获取文本信息') + result = OrderedDict() + for key, xpath_ext in selector_dict.items(): + result[key] = self.xpath_parse(html, xpath_ext) - def get_by_css(self, url, xpath, headers=None): + return result + + def get_by_css(self, url, selector_dict, headers=None): html = self.get_html(url, headers) - res = Selector(text=html).css(xpath).extract() - if len(res) != 0: - return res[0] - else: - raise Exception('无法获取文本信息') + result = OrderedDict() + for key, css_ext in selector_dict.items(): + result[key] = self.css_parse(html, css_ext) - def get_by_json(self, url, xpath, headers=None): + return result + + def get_by_json(self, url, selector_dict, headers=None): html = self.get_html(url, headers) - try: - resJson = json.loads(html) - except Exception: - raise Exception('Json转换错误') - res = json.dumps(jsonpath.jsonpath(resJson, xpath), ensure_ascii=False) + result = OrderedDict() + for key, json_ext in selector_dict.items(): + result[key] = self.json_parse(html, json_ext) - if len(res) != 0: - return res - else: - raise Exception('无法获取文本信息') + return result diff --git a/task/utils/selector/selector.py b/task/utils/selector/selector.py index f1707c1..22286da 100644 --- a/task/utils/selector/selector.py +++ b/task/utils/selector/selector.py @@ -5,12 +5,51 @@ @Date: 2019-03-25 12:23:59 @LastEditTime: 2020-03-01 14:54:14 ''' +import json from abc import ABCMeta, abstractmethod +import jsonpath +from scrapy.selector import Selector -class Selector(): + +class SelectorABC(): __metaclass__ = ABCMeta + def xpath_parse(self, html, xpath_ext): + if 'string()' in xpath_ext: + xpath_ext = xpath_ext.split('/') + xpath_ext = '/'.join(xpath_ext[:-1]) + res = Selector( + text=html).xpath(xpath_ext)[0].xpath('string(.)').extract() + else: + res = Selector(text=html).xpath(xpath_ext).extract() + + if len(res) != 0: + return res[0] + else: + raise Exception('无法获取文本信息') + + def css_parse(self, html, css_ext): + res = Selector(text=html).css(css_ext).extract() + + if len(res) != 0: + return res[0] + else: + raise Exception('无法获取文本信息') + + def json_parse(self, html, json_ext): + try: + resJson = json.loads(html) + except Exception: + raise Exception('Json转换错误') + res = json.dumps(jsonpath.jsonpath(resJson, json_ext), + ensure_ascii=False) + + if len(res) != 0: + return res + else: + raise Exception('无法获取文本信息') + @abstractmethod def get_by_xpath(self): pass diff --git a/tests/test_extract_info.py b/tests/test_extract_info.py index 0ccaa1e..ffd26c8 100644 --- a/tests/test_extract_info.py +++ b/tests/test_extract_info.py @@ -13,7 +13,6 @@ def test_re2(self): regular_expression = r'([1-9]\d*)' content = '1391好贵' res = extract_by_re(content, regular_expression) - print(res) self.assertEqual(res, '1391') diff --git a/tests/test_rule.py b/tests/test_rule.py index 2250333..8989f04 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -36,6 +36,19 @@ def test_contains(self): res = is_changed(rule, content, last_content) self.assertEqual(res, 1) + def test_without(self): + rule = '-without 变化' + content = 'abcdas' + last_content = '不变化' + res = is_changed(rule, content, last_content) + self.assertEqual(res, 2) + + rule = '-without 变化' + content = '我发生变化了' + last_content = '不变化' + res = is_changed(rule, content, last_content) + self.assertEqual(res, 1) + def test_increase(self): rule = '-increase 3' content = '1888.1' diff --git a/webmonitor/settings.py b/webmonitor/settings.py index 2eabdde..884d22e 100644 --- a/webmonitor/settings.py +++ b/webmonitor/settings.py @@ -42,7 +42,7 @@ ] STATICFILES_DIRS = [ - os.path.join(BASE_DIR, "static"), + os.path.join(BASE_DIR, 'static'), ] MIDDLEWARE = [ @@ -136,18 +136,20 @@ 'system_keep': True, 'menus': [{ - 'name': '文档', + 'name': '使用文档', 'icon': 'fa fa-file', - 'url': 'https://www.logicjake.xyz/WebMonitor/' + 'url': 'https://www.logicjake.xyz/WebMonitor/#/how' }], - 'menu_display': ['Simpleui', '任务管理', '设置', '文档'], + 'menu_display': ['Simpleui', '任务管理', '系统管理', '使用文档'], } SIMPLEUI_ICON = { + '系统管理': 'fas fa-cog', '系统邮箱': 'fas fa-mail-bulk', 'RSS监控管理': 'fas fa-rss', '网页监控管理': 'far fa-file-code', '任务状态': 'far fa-calendar-check', + '日志查看': 'fas fa-book-reader', } SIMPLEUI_ANALYSIS = False