Skip to content

Commit

Permalink
add arxiv push bot
Browse files Browse the repository at this point in the history
  • Loading branch information
Doragd committed May 20, 2023
1 parent 5b10037 commit 91b8b3f
Show file tree
Hide file tree
Showing 87 changed files with 73,496 additions and 73,299 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/push_arxiv_daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: push_arxiv_daily

env:
PYTHON_VERSION: "3.8" # set this to the Python version to use
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

on:
issues:
types:
- labeled

schedule:
- cron: '0 0 * * *' # 每天 UTC 时间 00:00 触发, 即上海时间8点

jobs:
daily-push:
if: github.event.label.name == 'push test'
runs-on: ubuntu-latest
env:
SERVERCHAN_API_KEY: ${{ secrets.SERVERCHAN_API_KEY }}
QUERY: "cs.IR"
THREADS: 2

permissions:
issues: write
contents: write

steps:
- uses: actions/checkout@v3
- name: Set up Python version
uses: actions/setup-python@v3
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install dependencies
run: pip install tqdm


- name: PUSH ARXIV DAILY
run: python arxiv.py

- name: Commit files
id: commit
run: |
git config --local user.email "[email protected]"
git config --local user.name "github-actions"
git add --all
if [-z "$(git status --porcelain)"]; then
echo "::set-output name=push::false"
else
git commit -m "chore: update confs" -a
echo "::set-output name=push::true"
fi
shell: bash

- name: Push changes
if: steps.commit.outputs.push == 'true'
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
3 changes: 2 additions & 1 deletion .github/workflows/update_confs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install dependencies
run: pip install openpyxl
run: pip install openpyxl tqdm


- name: Update the cache file from the issue
run: |
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
- [搜广推算法系列串讲](https://github.com/Doragd/Algorithm-Practice-in-Industry/blob/main/%E6%90%9C%E5%B9%BF%E6%8E%A8%E7%AE%97%E6%B3%95%E7%B3%BB%E5%88%97%E4%B8%B2%E8%AE%B2.md)



本文仅做资源收集,未引用具体内容,如有侵权,请联系删除。
* 源文件:source.xlsx,可以执行自定义排序
* 同时还提供了搜广推顶会论文列表
Expand Down
2 changes: 2 additions & 0 deletions arxiv.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[
]
135 changes: 135 additions & 0 deletions arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@

'''
credit to original author: Glenn ([email protected])
'''

import os
import requests
import time
import json
import datetime
from tqdm import tqdm


def get_yesterday():
today = datetime.datetime.now()
yesterday = today - datetime.timedelta(days=1)
return yesterday.strftime('%Y-%m-%d')


def search_arxiv_papers(search_term, max_results=10):
papers = []

url = f'http://export.arxiv.org/api/query?' + \
f'search_query=all:{search_term}' + \
f'&start=0&&max_results={max_results}' + \
f'&sortBy=submittedDate&sortOrder=descending'

response = requests.get(url)

if response.status_code != 200:
return []

feed = response.text
entries = feed.split('<entry>')[1:]

if not entries:
return []

print('[+] 开始处理每日最新论文....')

for entry in entries:

title = entry.split('<title>')[1].split('</title>')[0].strip()
summary = entry.split('<summary>')[1].split('</summary>')[0].strip()
url = entry.split('<id>')[1].split('</id>')[0].strip()
pub_date = entry.split('<published>')[1].split('</published>')[0]
pub_date = datetime.datetime.strptime(pub_date, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

papers.append({
'title': title,
'url': url,
'pub_date': pub_date,
'summary': summary,
})

return papers


def send_wechat_message(title, content, SERVERCHAN_API_KEY):
url = f'https://sctapi.ftqq.com/{SERVERCHAN_API_KEY}.send'
params = {
'title': title,
'desp': content,
}
requests.post(url, params=params)


def save_to_local_file(papers, filename='arxiv.json'):
with open(filename, 'r', encoding='utf-8') as f:
results = json.load(f)

titles = {paper['title'].lower() for paper in results}
add_papers = [paper for paper in papers if paper['title'].lower() not in titles]

results.extend(add_papers)

with open(filename, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=4, ensure_ascii=False)




def cronjob():

SERVERCHAN_API_KEY = os.environ.get("SERVERCHAN_API_KEY", None)

if SERVERCHAN_API_KEY is None:
raise Exception("未设置SERVERCHAN_API_KEY环境变量")

search_term = os.environ.get('QUERY', 'cs.IR')
max_results = os.environ.get('THREADS', 1)

print('[+] 开始执行每日推送任务....')
yesterday = get_yesterday()
print('[+] 开始检索每日最新论文....')
papers = search_arxiv_papers(search_term, max_results)

save_to_local_file(papers)

print('[+] 开始推送每日最新论文....')

for paper in tqdm(papers, total=len(papers), desc=f"论文推送进度"):

title = paper['title']
url = paper['url']
pub_date = paper['pub_date']
summary = paper['summary']

yesterday = get_yesterday()

if pub_date == yesterday:
msg_title = f'[Newest]Title: {title}'
else:
msg_title = f'Title: {title}'

msg_url = f'URL: {url}'
msg_pub_date = f'Pub Date:{pub_date}'
msg_summary = f'Summary:\n\n{summary}'

today = datetime.datetime.now().strftime('%Y-%m-%d')
push_title = f'Arxiv:{search_term}@{today}'
msg_content = f"[{msg_title}]({url})\n\n{msg_pub_date}\n\n{msg_url}\n\n{msg_summary}\n\n"

send_wechat_message(push_title, msg_content, SERVERCHAN_API_KEY)

time.sleep(12)

print('[+] 每日推送任务执行结束')


if __name__ == '__main__':
cronjob()



2 changes: 1 addition & 1 deletion citer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_citation(self, doi):
response = requests.get(url)
response.raise_for_status()
data = response.json()['message']
reference_count = data['reference-count']
reference_count = data['is-referenced-by-count']
self.cache[doi] = reference_count
return reference_count
except(requests.exceptions.RequestException, KeyError) as e:
Expand Down
Loading

0 comments on commit 91b8b3f

Please sign in to comment.