-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovieSpider.py
60 lines (39 loc) · 1.07 KB
/
movieSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#jj"import os
import urllib2
class movieSpider:
def __init__(self,baseAddr="https://www.dy2018.com/4",snapshot=""):
self.__str__ ="arthur"
self.baseAddr = baseAddr
self.snapshot = snapshot
self.codec = ''
pass
def readPage(self):
try:
print(self.baseAddr)
self.snapshot = urllib2.urlopen(self.baseAddr).read()
start = self.snapshot.find('charset=')
self.codec = self.snapshot[start+8:start+20].split('"')[0]
self.snapshot = self.snapshot.decode(self.codec,'ignore')
print(self.snapshot)
except :
print("retrieve content failed!")
pass
def gettotalPageNum(base_url):
pass
def getNextPage(base_url):
pass
def getMovieName():
pass
def getMovieActor():
pass
def getMovieAdress(url):
pass
def writeMovie():
pass
def loadMovieArchive():
pass
def downloadMove():
pass
if __name__ == "__main__":
a=movieSpider()
a.readPage()