Skip to content

Commit 9b9525d

Browse files
Ryan MitchellRyan Mitchell
authored andcommitted
Missing urlopen import, fixed formatting
1 parent 49b8515 commit 9b9525d

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

chapter5/6-6DegreesCrawlWiki.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from bs4 import BeautifulSoup
22
import re
33
import pymysql
4+
from urllib.request import urlopen
45

5-
conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock', user='root', passwd=None, db='mysql', charset='utf8')
6+
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mysql', charset='utf8')
67
cur = conn.cursor()
78
cur.execute("USE wikipedia")
89

@@ -29,12 +30,12 @@ def getLinks(pageUrl, recursionLevel):
2930
pageId = insertPageIfNotExists(pageUrl)
3031
html = urlopen("http://en.wikipedia.org"+pageUrl)
3132
bsObj = BeautifulSoup(html)
32-
for link in bsObj.findAll("a",
33-
href=re.compile("^(/wiki/)((?!:).)*$")):
34-
insertLink(pageId, insertPageIfNotExists(link.attrs['href']))
33+
for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")):
34+
insertLink(pageId, insertPageIfNotExists(link.attrs['href']))
3535
if link.attrs['href'] not in pages:
3636
#We have encountered a new page, add it and search it for links
3737
newPage = link.attrs['href']
38+
print(newPage)
3839
pages.add(newPage)
3940
getLinks(newPage, recursionLevel+1)
4041
getLinks("/wiki/Kevin_Bacon", 0)

0 commit comments

Comments
 (0)