forked from soimort/you-get
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
johnsmith2077
committed
Oct 3, 2020
1 parent
00e2ce3
commit 5c9ec6c
Showing
1 changed file
with
203 additions
and
158 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,168 +1,213 @@ | ||
#!/usr/bin/env python | ||
|
||
__all__ = ['acfun_download'] | ||
|
||
from ..common import * | ||
from ..extractor import VideoExtractor | ||
|
||
class AcFun(VideoExtractor): | ||
name = "AcFun" | ||
|
||
stream_types = [ | ||
{'id': '2160P', 'qualityType': '2160p'}, | ||
{'id': '1080P60', 'qualityType': '1080p60'}, | ||
{'id': '720P60', 'qualityType': '720p60'}, | ||
{'id': '1080P+', 'qualityType': '1080p+'}, | ||
{'id': '1080P', 'qualityType': '1080p'}, | ||
{'id': '720P', 'qualityType': '720p'}, | ||
{'id': '540P', 'qualityType': '540p'}, | ||
{'id': '360P', 'qualityType': '360p'} | ||
] | ||
|
||
def prepare(self, **kwargs): | ||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url) | ||
|
||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', self.url): | ||
html = get_content(self.url, headers=fake_headers) | ||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") | ||
json_data = json.loads(json_text) | ||
vid = json_data.get('currentVideoInfo').get('id') | ||
up = json_data.get('user').get('name') | ||
self.title = json_data.get('title') | ||
video_list = json_data.get('videoList') | ||
if len(video_list) > 1: | ||
self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] | ||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
|
||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url): | ||
html = get_content(self.url, headers=fake_headers) | ||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') | ||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] | ||
json_data = json.loads(json_text) | ||
self.title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] | ||
vid = str(json_data['videoId']) | ||
up = "acfun" | ||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
|
||
from .le import letvcloud_download_by_vu | ||
from .qq import qq_download_by_vid | ||
from .sina import sina_download_by_vid | ||
from .tudou import tudou_download_by_iid | ||
from .youku import youku_download_by_vid | ||
|
||
import json | ||
import re | ||
import base64 | ||
import time | ||
|
||
def get_srt_json(id): | ||
url = 'http://danmu.aixifan.com/V2/%s' % id | ||
return get_content(url) | ||
|
||
def youku_acfun_proxy(vid, sign, ref): | ||
endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}' | ||
url = endpoint.format(vid, sign, str(int(time.time() * 1000))) | ||
json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data'] | ||
enc_text = base64.b64decode(json_data) | ||
dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8') | ||
youku_json = json.loads(dec_text) | ||
|
||
yk_streams = {} | ||
for stream in youku_json['stream']: | ||
tp = stream['stream_type'] | ||
yk_streams[tp] = [], stream['total_size'] | ||
if stream.get('segs'): | ||
for seg in stream['segs']: | ||
yk_streams[tp][0].append(seg['url']) | ||
else: | ||
yk_streams[tp] = stream['m3u8'], stream['total_size'] | ||
|
||
return yk_streams | ||
|
||
def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs): | ||
"""str, str, str, bool, bool ->None | ||
Download Acfun video by vid. | ||
Call Acfun API, decide which site to use, and pass the job to its | ||
extractor. | ||
""" | ||
|
||
#first call the main parasing API | ||
info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers)) | ||
|
||
sourceType = info['sourceType'] | ||
|
||
#decide sourceId to know which extractor to use | ||
if 'sourceId' in info: sourceId = info['sourceId'] | ||
# danmakuId = info['danmakuId'] | ||
|
||
#call extractor decided by sourceId | ||
if sourceType == 'sina': | ||
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) | ||
elif sourceType == 'youku': | ||
youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) | ||
elif sourceType == 'tudou': | ||
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) | ||
elif sourceType == 'qq': | ||
qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only) | ||
elif sourceType == 'letv': | ||
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) | ||
elif sourceType == 'zhuzhan': | ||
#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this | ||
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player | ||
#old code removed | ||
url = 'http://www.acfun.cn/v/ac' + vid | ||
yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url) | ||
seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd'] | ||
for t in seq: | ||
if yk_streams.get(t): | ||
preferred = yk_streams[t] | ||
break | ||
#total_size in the json could be incorrect(F.I. 0) | ||
size = 0 | ||
for url in preferred[0]: | ||
_, _, seg_size = url_info(url) | ||
size += seg_size | ||
#fallback to flvhd is not quite possible | ||
if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]): | ||
ext = 'flv' | ||
else: | ||
ext = 'mp4' | ||
print_info(site_info, title, ext, size) | ||
if not info_only: | ||
download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge) | ||
else: | ||
raise NotImplementedError(sourceType) | ||
|
||
if not info_only and not dry_run: | ||
if not kwargs['caption']: | ||
print('Skipping danmaku.') | ||
return | ||
try: | ||
title = get_filename(title) | ||
print('Downloading %s ...\n' % (title + '.cmt.json')) | ||
cmt = get_srt_json(vid) | ||
with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x: | ||
x.write(cmt) | ||
except: | ||
pass | ||
|
||
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): | ||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) | ||
|
||
def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo): | ||
if 'playInfos' in currentVideoInfo: | ||
return currentVideoInfo['playInfos'][0]['playUrls'][0] | ||
elif 'ksPlayJson' in currentVideoInfo: | ||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) | ||
raise NotImplemented | ||
|
||
if 'ksPlayJson' in currentVideoInfo: | ||
durationMillis = currentVideoInfo['durationMillis'] | ||
ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) | ||
representation = ksPlayJson.get('adaptationSet')[0].get('representation') | ||
reps = [] | ||
for one in representation: | ||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) | ||
return max(reps)[1] | ||
|
||
|
||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): | ||
html = get_content(url, headers=fake_headers) | ||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") | ||
json_data = json.loads(json_text) | ||
vid = json_data.get('currentVideoInfo').get('id') | ||
up = json_data.get('user').get('name') | ||
title = json_data.get('title') | ||
video_list = json_data.get('videoList') | ||
if len(video_list) > 1: | ||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] | ||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) | ||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): | ||
html = get_content(url, headers=fake_headers) | ||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') | ||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] | ||
json_data = json.loads(json_text) | ||
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] | ||
vid = str(json_data['videoId']) | ||
up = "acfun" | ||
|
||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) | ||
|
||
else: | ||
raise NotImplemented | ||
|
||
assert title and m3u8_url | ||
title = unescape_html(title) | ||
title = escape_file_path(title) | ||
p_title = r1('active">([^<]+)', html) | ||
title = '%s (%s)' % (title, up) | ||
if p_title: | ||
title = '%s - %s' % (title, p_title) | ||
|
||
print_info(site_info, title, 'm3u8', float('inf')) | ||
if not info_only: | ||
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge) | ||
stream_list = representation | ||
|
||
for stream in stream_list: | ||
m3u8_url = stream["url"] | ||
size = durationMillis * stream["avgBitrate"] / 8 | ||
# size = float('inf') | ||
container = 'mp4' | ||
stream_id = stream["qualityLabel"] | ||
quality = stream["qualityType"] | ||
|
||
stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality) | ||
self.streams[stream_id] = stream_data | ||
|
||
assert self.title and m3u8_url | ||
self.title = unescape_html(self.title) | ||
self.title = escape_file_path(self.title) | ||
p_title = r1('active">([^<]+)', html) | ||
self.title = '%s (%s)' % (self.title, up) | ||
if p_title: | ||
self.title = '%s - %s' % (self.title, p_title) | ||
|
||
|
||
def download(self, **kwargs): | ||
if 'json_output' in kwargs and kwargs['json_output']: | ||
json_output.output(self) | ||
elif 'info_only' in kwargs and kwargs['info_only']: | ||
if 'stream_id' in kwargs and kwargs['stream_id']: | ||
# Display the stream | ||
stream_id = kwargs['stream_id'] | ||
if 'index' not in kwargs: | ||
self.p(stream_id) | ||
else: | ||
self.p_i(stream_id) | ||
else: | ||
# Display all available streams | ||
if 'index' not in kwargs: | ||
self.p([]) | ||
else: | ||
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] | ||
self.p_i(stream_id) | ||
|
||
else: | ||
if 'stream_id' in kwargs and kwargs['stream_id']: | ||
# Download the stream | ||
stream_id = kwargs['stream_id'] | ||
else: | ||
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] | ||
|
||
if 'index' not in kwargs: | ||
self.p(stream_id) | ||
else: | ||
self.p_i(stream_id) | ||
if stream_id in self.streams: | ||
url = self.streams[stream_id]['src'] | ||
ext = self.streams[stream_id]['container'] | ||
total_size = self.streams[stream_id]['size'] | ||
|
||
|
||
if ext == 'm3u8' or ext == 'm4a': | ||
ext = 'mp4' | ||
|
||
if not url: | ||
log.wtf('[Failed] Cannot extract video source.') | ||
# For legacy main() | ||
headers = {} | ||
if self.ua is not None: | ||
headers['User-Agent'] = self.ua | ||
if self.referer is not None: | ||
headers['Referer'] = self.referer | ||
|
||
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge']) | ||
|
||
if 'caption' not in kwargs or not kwargs['caption']: | ||
print('Skipping captions or danmaku.') | ||
return | ||
|
||
for lang in self.caption_tracks: | ||
filename = '%s.%s.srt' % (get_filename(self.title), lang) | ||
print('Saving %s ... ' % filename, end="", flush=True) | ||
srt = self.caption_tracks[lang] | ||
with open(os.path.join(kwargs['output_dir'], filename), | ||
'w', encoding='utf-8') as x: | ||
x.write(srt) | ||
print('Done.') | ||
|
||
if self.danmaku is not None and not dry_run: | ||
filename = '{}.cmt.xml'.format(get_filename(self.title)) | ||
print('Downloading {} ...\n'.format(filename)) | ||
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp: | ||
fp.write(self.danmaku) | ||
|
||
if self.lyrics is not None and not dry_run: | ||
filename = '{}.lrc'.format(get_filename(self.title)) | ||
print('Downloading {} ...\n'.format(filename)) | ||
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp: | ||
fp.write(self.lyrics) | ||
|
||
# For main_dev() | ||
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size']) | ||
keep_obj = kwargs.get('keep_obj', False) | ||
if not keep_obj: | ||
self.__init__() | ||
|
||
|
||
def acfun_download(self, url, output_dir='.', merge=True, info_only=False, **kwargs): | ||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) | ||
|
||
def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo): | ||
if 'playInfos' in currentVideoInfo: | ||
return currentVideoInfo['playInfos'][0]['playUrls'][0] | ||
elif 'ksPlayJson' in currentVideoInfo: | ||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) | ||
representation = ksPlayJson.get('adaptationSet')[0].get('representation') | ||
reps = [] | ||
for one in representation: | ||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) | ||
return max(reps)[1] | ||
|
||
|
||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): | ||
html = get_content(url, headers=fake_headers) | ||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") | ||
json_data = json.loads(json_text) | ||
vid = json_data.get('currentVideoInfo').get('id') | ||
up = json_data.get('user').get('name') | ||
title = json_data.get('title') | ||
video_list = json_data.get('videoList') | ||
if len(video_list) > 1: | ||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] | ||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) | ||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): | ||
html = get_content(url, headers=fake_headers) | ||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') | ||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] | ||
json_data = json.loads(json_text) | ||
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] | ||
vid = str(json_data['videoId']) | ||
up = "acfun" | ||
|
||
currentVideoInfo = json_data.get('currentVideoInfo') | ||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) | ||
|
||
else: | ||
raise NotImplemented | ||
|
||
assert title and m3u8_url | ||
title = unescape_html(title) | ||
title = escape_file_path(title) | ||
p_title = r1('active">([^<]+)', html) | ||
title = '%s (%s)' % (title, up) | ||
if p_title: | ||
title = '%s - %s' % (title, p_title) | ||
|
||
print_info(site_info, title, 'm3u8', float('inf')) | ||
if not info_only: | ||
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge) | ||
|
||
site = AcFun() | ||
site_info = "AcFun.cn" | ||
download = acfun_download | ||
download = site.download_by_url | ||
download_playlist = playlist_not_supported('acfun') |