-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest3.py
35 lines (29 loc) · 1.03 KB
/
test3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding:utf-8 -*-
# Platfrom: win7
# Python: 3.5
# Author: wucl([email protected])
# Program: test3
# History: 2016.7.26
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
html = urlopen("http://www.19lou.com/?from=hangzhou")
soup = BeautifulSoup(html.read(), 'html.parser')
#for child in soup.find("table", {"id":"giftList"}).children:
# print(child.find("td"))
#for sibling in soup.find("table", {"id":"giftList"}).tr.next_siblings:
# print(sibling) #.find("span").get_text())
#print(soup.tbody)
#for table in soup.findAll('table').findAll('img'):
# print(table)
# print('*'*100)
for i in soup.findAll('li'):
cont = i.find("a")
try:
print(cont['title'], cont['href'])
except:
pass
# print(i.attrs)
# print(i.parent.previous_sibling.get_text())
#print(soup.find("img", {"src":re.compile(r"../img/gifts/img\d.jpg")})) #.parent.previous_sibling.get_text())
#print(soup.findAll(lambda tag: len(tag.attrs) == 2))