-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
1,093 additions
and
16 deletions.
There are no files selected for viewing
Empty file.
150 changes: 150 additions & 0 deletions
150
old_code/.ipynb_checkpoints/get_doc_biblio-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import requests\n", | ||
"from bs4 import BeautifulSoup\n", | ||
"from pprint import pprint as fprint" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "NameError", | ||
"evalue": "name 'requests' is not defined", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-1-222e90fcddd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdoc_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mbs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"doc_url = \"http://www.cnki.com.cn/Article/CJFDTOTAL-TSQB201807022.htm\"\n", | ||
"r = requests.get(doc_url)\n", | ||
"bs = BeautifulSoup(r.text)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"《图书情报工作》\n", | ||
"纳米出版及其应用研究进展\n", | ||
"['牛丽慧', '欧石燕']\n", | ||
"[目的 /意义]随着学术期刊文献的大量增长,在传统科学文献出版模式下,科研人员需要花费大量时间从文献中查找、获取和解读所需信息。为了促进科学信息的传播与交流,面向科学文献内容的细粒度语义出版成为一种新趋势。本文介绍语义出版中的一种代表性出版模式\"纳米出版(nanopublication)\",并剖析纳米出版在不同学科领域中应用的可能性及应用特点。[方法 /过程]首先对纳米出版模型进行了介绍,然后通过文献调研对纳米出版的应用现状进行了述评,最后以实例说明纳米出版在不同学科领域中的应用特点。[结果 /结论]研究结果表明:(1)纳米出版目前主要应用于生物医学领域,在计算机和人文科学有少量应用,在其他领域几乎没有什么应用;(2)纳米出版可以扩展到其他学科领域进行应用,但是需要根据学科特征构建符合学科领域特点的纳米出版物。\n", | ||
"南京大学信息管理学院\n", | ||
"['纳米出版', '语义出版', '知识表示']\n", | ||
"2018\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#journal\n", | ||
"journal = bs.find(\"b\").text.strip()\n", | ||
"print(journal)\n", | ||
"\n", | ||
"#title\n", | ||
"title = bs.find(\"h1\").text.strip()\n", | ||
"print(title)\n", | ||
"\n", | ||
"#ahthor\n", | ||
"res = bs.find(\"div\",{\"style\":\"text-align:center; width:740px; height:30px;\"})\n", | ||
"# print(res)\n", | ||
"author_list = list()\n", | ||
"for item in res.find_all(\"a\"):\n", | ||
" author_list.append(item.string.strip())\n", | ||
"print(author_list)\n", | ||
"\n", | ||
"#abstract\n", | ||
"abstract = bs.find(\"div\",{\"style\":\"text-align:left;word-break:break-all\"}).font.next_sibling.string.strip()\n", | ||
"print(abstract)\n", | ||
"\n", | ||
"#address\n", | ||
"address = bs.find(\"div\",{\"style\":\"text-align:left;\"}).a.string.strip()\n", | ||
"print(address)\n", | ||
"\n", | ||
"#keywords\n", | ||
"keyword_list = bs.find(\"meta\",{\"name\":\"keywords\"})[\"content\"].split()\n", | ||
"print(keyword_list)\n", | ||
"\n", | ||
"#year\n", | ||
"year = bs.find(\"font\",{\"color\":\"#0080ff\"}).string.strip()[:4]\n", | ||
"print(year)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "TypeError", | ||
"evalue": "write() takes exactly one argument (0 given)", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[1;32m<ipython-input-6-48fe4239b792>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"T1 \"\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"JF \"\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mjournal\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 9\u001b[0m \u001b[0mres_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | ||
"\u001b[1;31mTypeError\u001b[0m: write() takes exactly one argument (0 given)" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"file_name = \"download_\"+doc_url[31:-4]+\".txt\"\n", | ||
"res_file = open(file_name,\"w\",encoding=\"utf-8\")\n", | ||
"for i,author in enumerate(author_list):\n", | ||
" res_file.write(\"A\"+str(i+1)+\" \"+author+\"\\n\")\n", | ||
"res_file.write(\"AD \"+address+\"\\n\")\n", | ||
"res_file.write(\"T1 \"+title+\"\\n\")\n", | ||
"res_file.write(\"JF \"+journal+\"\\n\")\n", | ||
"res_file.write(\"YR \"+year+\"\\n\")\n", | ||
"res_file.write(\"K1 \"+\";\".join(keyword_list)+\"\\n\")\n", | ||
"res_file.write(\"AB \"+abstract+\"\\n\")\n", | ||
"res_file.close()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.