forked from moyy996/AVDC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathavsox.py
158 lines (129 loc) · 5 KB
/
avsox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import json
import re
from bs4 import BeautifulSoup
from lxml import etree
from Function.getHtml import get_html
def getActorPhoto(htmlcode): # //*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = {}
for i in a:
l = i.img['src']
t = i.span.get_text()
p2 = {t: l}
d.update(p2)
return d
def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") # [0]
return result.replace('/', '')
except:
return ''
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = []
for i in a:
d.append(i.span.get_text())
return d
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace(
"', '", ' ')
return result1
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
return result1
def getSeries(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
return result1
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1
def getYear(release):
try:
result = str(re.search('\d{4}', release).group())
return result
except:
return release
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
return result1
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
return result
def getCover_small(htmlcode, count):
html = etree.fromstring(htmlcode, etree.HTMLParser())
cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
return cover_small
def getTag(a): # 获取演员
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'genre'})
d = []
for i in a:
d.append(i.get_text())
return d
def getUrl(number):
response = get_html('https://avsox.host/cn/search/' + number)
html = etree.fromstring(response, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
if len(url_list) > 0:
for i in range(1, len(url_list) + 1):
number_get = str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(" ['']")
if number.upper() == number_get.upper():
return i, response, str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/@href')).strip(" ['']")
return response, ''
def main(number):
try:
count, response, url = getUrl(number)
if str(response) == 'ProxyError':
raise TimeoutError
if url == '':
raise Exception('Movie Data not found in avsox!')
web = get_html(url)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
number = getNum(web)
print(1)
dic = {
'actor': getActor(web),
'title': getTitle(web).strip(number).strip().replace(' ', '-'),
'studio': getStudio(info),
'runtime': getRuntime(info),
'release': getRelease(info),
'number': getNum(info),
'tag': getTag(web),
'series': getSeries(info),
'year': getYear(getRelease(info)),
'actor_photo': getActorPhoto(web),
'cover': getCover(web),
'cover_small': getCover_small(response, count),
'imagecut': 3,
'director': '',
'publisher': '',
'outline': '',
'score': '',
'website': url,
'source': 'avsox.py',
}
except TimeoutError:
dic = {
'title': '',
'website': 'timeout',
}
except Exception as error_info:
print('Error in avsox.main : ' + str(error_info))
dic = {
'title': '',
'website': '',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
# print(main('051119-917'))
# print(main('032620_001'))