Skip to content

Commit

Permalink
[tunein] Add new extractor (Closes #4097)
Browse files Browse the repository at this point in the history
  • Loading branch information
Naglis Jonaitis committed Nov 24, 2014
1 parent 00e9d39 commit 2c25a2b
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 0 deletions.
1 change: 1 addition & 0 deletions youtube_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tvigle import TvigleIE
Expand Down
101 changes: 101 additions & 0 deletions youtube_dl/extractor/tunein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor
from ..utils import ExtractorError


class TuneInIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:
tunein\.com/
(?:
radio/.*?-s|
station/.*?StationId\=
)(?P<id>[0-9]+)
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
)
'''

_INFO_DICT = {
'id': '34682',
'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
'ext': 'AAC',
'thumbnail': 're:^https?://.*\.png$',
'location': 'Tacoma, WA',
}
_TESTS = [
{
'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
{ # test redirection
'url': 'http://tun.in/ser7s',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
]

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
redirect_id = mobj.group('redirect_id')
if redirect_id:
# The server doesn't support HEAD requests
urlh = self._request_webpage(
url, redirect_id, note='Downloading redirect page')
url = urlh.geturl()
self.to_screen('Following redirect: %s' % url)
mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id')

webpage = self._download_webpage(
url, station_id, note='Downloading station webpage')

payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title']
thumbnail = station_info.get('Logo')
location = station_info.get('Location')
streams_url = station_info.get('StreamUrl')
if not streams_url:
raise ExtractorError('No downloadable streams found',
expected=True)
stream_data = self._download_webpage(
streams_url, station_id, note='Downloading stream data')
streams = json.loads(self._search_regex(
r'\((.*)\);', stream_data, 'stream info'))['Streams']

is_live = None
formats = []
for stream in streams:
if stream.get('Type') == 'Live':
is_live = True
formats.append({
'abr': stream.get('Bandwidth'),
'ext': stream.get('MediaType'),
'acodec': stream.get('MediaType'),
'vcodec': 'none',
'url': stream.get('Url'),
# Sometimes streams with the highest quality do not exist
'preference': stream.get('Reliability'),
})
self._sort_formats(formats)

return {
'id': station_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'location': location,
'is_live': is_live,
}

0 comments on commit 2c25a2b

Please sign in to comment.