-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbs4_impl.py
38 lines (27 loc) · 1023 Bytes
/
bs4_impl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from bs4 import BeautifulSoup
from bs4.diagnose import diagnose
import re
import json
import settings
"""
path: soup.contents[5].contents[5].text
"""
def get_profile_json(html_source):
soup = BeautifulSoup(html_source, settings.config.get('parser', 'html.parser'))
if settings.config.get('debug'):
diagnose(html_source)
content = soup.find(type='text/javascript', string=re.compile('window._sharedData'))
# TODO: check for content and handle exception to skip the pic and advance to next
shared_data = content.string.strip().replace('window._sharedData = ', '')[:-1]
result = {}
try:
result = json.loads(shared_data)
except Exception:
pass
return result
def get_pic_url(html_source):
soup = BeautifulSoup(html_source, settings.config.get('parser', 'html.parser'))
if settings.config.get('debug'):
diagnose(html_source)
content = soup.find('meta', content=re.compile("https://instagram"))
return content.attrs.get('content')