forked from emijrp/wikidata
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp18.sources.py
117 lines (109 loc) · 4.93 KB
/
p18.sources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2017 emijrp <[email protected]>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import time
import pwb
import pywikibot
from wikidatafun import *
def main():
sites = {
'arwiki': pywikibot.Site('ar', 'wikipedia'),
'cawiki': pywikibot.Site('ca', 'wikipedia'),
'dewiki': pywikibot.Site('de', 'wikipedia'),
'enwiki': pywikibot.Site('en', 'wikipedia'),
'eswiki': pywikibot.Site('es', 'wikipedia'),
'fawiki': pywikibot.Site('fa', 'wikipedia'),
'frwiki': pywikibot.Site('fr', 'wikipedia'),
'itwiki': pywikibot.Site('it', 'wikipedia'),
'jawiki': pywikibot.Site('ja', 'wikipedia'),
'nlwiki': pywikibot.Site('nl', 'wikipedia'),
'nowiki': pywikibot.Site('no', 'wikipedia'),
'plwiki': pywikibot.Site('pl', 'wikipedia'),
'ptwiki': pywikibot.Site('pt', 'wikipedia'),
'ruwiki': pywikibot.Site('ru', 'wikipedia'),
'svwiki': pywikibot.Site('sv', 'wikipedia'),
'ukwiki': pywikibot.Site('uk', 'wikipedia'),
'viwiki': pywikibot.Site('vi', 'wikipedia'),
'zhwiki': pywikibot.Site('zh', 'wikipedia'),
'wikidata': pywikibot.Site('wikidata', 'wikidata'),
}
importedfroms = {
'arwiki': 'Q199700',
'cawiki': 'Q199693',
'dewiki': 'Q48183',
'enwiki': 'Q328',
'eswiki': 'Q8449',
'fawiki': 'Q48952',
'frwiki': 'Q8447',
'itwiki': 'Q11920',
'jawiki': 'Q177837',
'nlwiki': 'Q10000',
'nowiki': 'Q191769',
'plwiki': 'Q1551807',
'ptwiki': 'Q11921',
'ruwiki': 'Q206855',
'svwiki': 'Q169514',
'ukwiki': 'Q199698',
'viwiki': 'Q200180',
'zhwiki': 'Q30239',
}
wikisites = ['enwiki', 'dewiki', 'frwiki', 'itwiki', 'eswiki', 'plwiki', 'ptwiki', 'nlwiki', 'svwiki', 'ruwiki', 'jawiki', 'viwiki', 'zhwiki', 'ukwiki', 'cawiki', 'fawiki', 'arwiki', 'nowiki', ] #prefered order for importedfrom
repo = sites['wikidata'].data_repository()
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=SELECT%20%3Fitem%0AWHERE%20%7B%0A%09%3Fitem%20wdt%3AP18%20%3Fimage.%0A%7D'
url = '%s&format=json' % (url)
sparql = getURL(url=url)
json1 = loadSPARQL(sparql=sparql)
skip = ''
for result in json1['results']['bindings']:
q = 'item' in result and result['item']['value'].split('/entity/')[1] or ''
print('==', q, '==')
if skip:
if skip != q:
print('Skiping until', skip)
continue
else:
skip = ''
item = pywikibot.ItemPage(repo, q)
try: #to detect Redirect because .isRedirectPage fails
item.get()
except:
print('Error while .get()')
continue
if 'P18' in item.claims:
for itemimage in item.claims['P18']:
imagefilename = itemimage.getTarget().title().split('File:')[1]
imagefilename_r = '(?i)%s' % (imagefilename.replace(' ', '[_ ]'))
sources = itemimage.getSources()
if sources:
print('Item has sources for P18. Skiping...')
continue
else:
print('Item doesnt have sources for P18')
#print(item.sitelinks)
for wikisite in wikisites:
if wikisite in item.sitelinks:
page = pywikibot.Page(sites[wikisite], item.sitelinks[wikisite])
if page.exists() and not page.isRedirectPage() and \
re.search(imagefilename_r, page.text):
print('Image "%s" found in %s "%s"' % (imagefilename.encode('utf-8'), wikisite, page.title().encode('utf-8')))
importedfrom = pywikibot.Claim(repo, 'P143')
importedwp = pywikibot.ItemPage(repo, importedfroms[wikisite])
importedfrom.setTarget(importedwp)
itemimage.addSource(importedfrom, summary='BOT - Adding 1 reference: [[Property:P143]]: [[%s]]' % (importedfroms[wikisite]))
break
print("Finished successfully")
if __name__ == "__main__":
main()