-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcitation_finder.py
64 lines (60 loc) · 1.99 KB
/
citation_finder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import requests
from bs4 import BeautifulSoup
import pubmed_parser as pp
import pymysql
def get_citations(pmcid):
pmcid="PMC"+pmcid
# print(pmcid)
articles_url = 'http://www.ncbi.nlm.nih.gov/pmc/articles/%s' % pmcid
headers = {
'User-Agent': (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/39.0.2171.95 Safari/537.36"
)
}
page = requests.get(articles_url, headers=headers)
soup = BeautifulSoup(page.content, "lxml")
pubmed_article_urls = [
span.a['href'] for span in soup.findAll("span", {"class": "nowrap ref pubmed"})
]
return [url.replace(r'/pubmed/', '') for url in pubmed_article_urls]
if __name__ == '__main__':
db=pymysql.connect(host="127.0.0.1",user="root",passwd="dehradun123",db="pubmed")
curr=db.cursor()
curr.execute("select pmc from pubmed_article where pmc like 'P%';")
pmc_list=[pm[0][3:] for pm in curr.fetchall()]
print(pmc_list)
try:
# As you can see, there is no Foriegn Key constraint as at this point of time our
# db simply isn't big enough for us to impliment that constarint as it will be violated all the time becuase
# we don't have all the papers in our db
curr.execute("""create table citations (
citated_by varchar(500),
citated_to varchar(500)
);
""")
except Exception as e:
print("citations",e)
print(pmc_list[6])
# exit(0)
for pmcid in pmc_list:
#CIitations given by these papers
print(pmcid)
try:
by_this=get_citations(pmcid)
for to_cite in by_this:
print("\tTo "+to_cite)
curr.execute("""insert into citations (citated_by,citated_to) values ('%s','%s');""" % ( pmcid, to_cite) )
print(pmcid)
db.commit()
except Exception as e:
print("citations to",e)
try:
by_this=pp.parse_citation_web(pmcid,"PMC")['pmc_cited']
for to_cite in by_this:
print("\tFrom "+to_cite)
curr.execute("""insert into citations (citated_by,citated_to) values ('%s','%s');""" % ( to_cite, pmcid) )
db.commit()
except Exception as e:
print("citations from",e)