Skip to content

Commit

Permalink
Merge pull request soimort#1708 from rosynirvana/fix_tudou
Browse files Browse the repository at this point in the history
fix tudou.py when there is no title
  • Loading branch information
cnbeining authored Apr 20, 2017
2 parents c118ce7 + ccb1bb9 commit b314985
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/you_get/extractors/tudou.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only =
html = get_html('http://www.tudou.com/programs/view/%s/' % id)

iid = r1(r'iid\s*[:=]\s*(\S+)', html)
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
try:
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
except AttributeError:
title = ''
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)

def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
Expand All @@ -42,13 +45,20 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
if id:
return tudou_download_by_id(id, title="", info_only=info_only)

html = get_decoded_html(url)
html = get_content(url)

title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title
title = unescape_html(title)
try:
title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title
title = unescape_html(title)
except AttributeError:
title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"')
if title is None:
title = ''

vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
if vcode is None:
vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"')
if vcode:
from .youku import youku_download_by_vid
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
Expand Down

0 comments on commit b314985

Please sign in to comment.