Skip to content

Commit

Permalink
create function lib
Browse files Browse the repository at this point in the history
  • Loading branch information
pkuimyy committed Feb 7, 2019
1 parent c13d5ab commit 00e700b
Show file tree
Hide file tree
Showing 6 changed files with 1,093 additions and 16 deletions.
Empty file removed log/author_url_set_log.txt
Empty file.
150 changes: 150 additions & 0 deletions old_code/.ipynb_checkpoints/get_doc_biblio-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from pprint import pprint as fprint"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'requests' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-222e90fcddd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdoc_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mbs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined"
]
}
],
"source": [
"doc_url = \"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\n",
"r = requests.get(doc_url)\n",
"bs = BeautifulSoup(r.text)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"《图书情报工作》\n",
"纳米出版及其应用研究进展\n",
"['牛丽慧', '欧石燕']\n",
"[目的 /意义]随着学术期刊文献的大量增长,在传统科学文献出版模式下,科研人员需要花费大量时间从文献中查找、获取和解读所需信息。为了促进科学信息的传播与交流,面向科学文献内容的细粒度语义出版成为一种新趋势。本文介绍语义出版中的一种代表性出版模式\"纳米出版(nanopublication)\",并剖析纳米出版在不同学科领域中应用的可能性及应用特点。[方法 /过程]首先对纳米出版模型进行了介绍,然后通过文献调研对纳米出版的应用现状进行了述评,最后以实例说明纳米出版在不同学科领域中的应用特点。[结果 /结论]研究结果表明:(1)纳米出版目前主要应用于生物医学领域,在计算机和人文科学有少量应用,在其他领域几乎没有什么应用;(2)纳米出版可以扩展到其他学科领域进行应用,但是需要根据学科特征构建符合学科领域特点的纳米出版物。\n",
"南京大学信息管理学院\n",
"['纳米出版', '语义出版', '知识表示']\n",
"2018\n"
]
}
],
"source": [
"#journal\n",
"journal = bs.find(\"b\").text.strip()\n",
"print(journal)\n",
"\n",
"#title\n",
"title = bs.find(\"h1\").text.strip()\n",
"print(title)\n",
"\n",
"#ahthor\n",
"res = bs.find(\"div\",{\"style\":\"text-align:center; width:740px; height:30px;\"})\n",
"# print(res)\n",
"author_list = list()\n",
"for item in res.find_all(\"a\"):\n",
" author_list.append(item.string.strip())\n",
"print(author_list)\n",
"\n",
"#abstract\n",
"abstract = bs.find(\"div\",{\"style\":\"text-align:left;word-break:break-all\"}).font.next_sibling.string.strip()\n",
"print(abstract)\n",
"\n",
"#address\n",
"address = bs.find(\"div\",{\"style\":\"text-align:left;\"}).a.string.strip()\n",
"print(address)\n",
"\n",
"#keywords\n",
"keyword_list = bs.find(\"meta\",{\"name\":\"keywords\"})[\"content\"].split()\n",
"print(keyword_list)\n",
"\n",
"#year\n",
"year = bs.find(\"font\",{\"color\":\"#0080ff\"}).string.strip()[:4]\n",
"print(year)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "write() takes exactly one argument (0 given)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-6-48fe4239b792>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"T1 \"\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"JF \"\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mjournal\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 9\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: write() takes exactly one argument (0 given)"
]
}
],
"source": [
"file_name = \"download_\"+doc_url[31:-4]+\".txt\"\n",
"res_file = open(file_name,\"w\",encoding=\"utf-8\")\n",
"for i,author in enumerate(author_list):\n",
" res_file.write(\"A\"+str(i+1)+\" \"+author+\"\\n\")\n",
"res_file.write(\"AD \"+address+\"\\n\")\n",
"res_file.write(\"T1 \"+title+\"\\n\")\n",
"res_file.write(\"JF \"+journal+\"\\n\")\n",
"res_file.write(\"YR \"+year+\"\\n\")\n",
"res_file.write(\"K1 \"+\";\".join(keyword_list)+\"\\n\")\n",
"res_file.write(\"AB \"+abstract+\"\\n\")\n",
"res_file.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
16 changes: 14 additions & 2 deletions old_code/get_doc_biblio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,21 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'requests' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-222e90fcddd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdoc_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mbs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined"
]
}
],
"source": [
"doc_url = \"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\n",
"r = requests.get(doc_url)\n",
Expand Down
Loading

0 comments on commit 00e700b

Please sign in to comment.