|
|
@ -35,6 +35,87 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
'skip': 'Blocked outside of Germany', |
|
|
|
}] |
|
|
|
|
|
|
|
def _extract_media_info(self, media_info_url, webpage, video_id): |
|
|
|
media_info = self._download_json( |
|
|
|
media_info_url, video_id, 'Downloading media JSON') |
|
|
|
|
|
|
|
formats = self._extract_formats(media_info, video_id) |
|
|
|
|
|
|
|
if not formats: |
|
|
|
if '"fsk"' in webpage: |
|
|
|
raise ExtractorError( |
|
|
|
'This video is only available after 20:00', expected=True) |
|
|
|
elif media_info.get('_geoblocked'): |
|
|
|
raise ExtractorError('This video is not available due to geo restriction', expected=True) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
duration = int_or_none(media_info.get('_duration')) |
|
|
|
thumbnail = media_info.get('_previewImage') |
|
|
|
|
|
|
|
subtitles = {} |
|
|
|
subtitle_url = media_info.get('_subtitleUrl') |
|
|
|
if subtitle_url: |
|
|
|
subtitles['de'] = [{ |
|
|
|
'ext': 'srt', |
|
|
|
'url': subtitle_url, |
|
|
|
}] |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'duration': duration, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'formats': formats, |
|
|
|
'subtitles': subtitles, |
|
|
|
} |
|
|
|
|
|
|
|
def _extract_formats(self, media_info, video_id): |
|
|
|
type_ = media_info.get('_type') |
|
|
|
media_array = media_info.get('_mediaArray', []) |
|
|
|
formats = [] |
|
|
|
for num, media in enumerate(media_array): |
|
|
|
for stream in media.get('_mediaStreamArray', []): |
|
|
|
stream_urls = stream.get('_stream') |
|
|
|
if not stream_urls: |
|
|
|
continue |
|
|
|
if not isinstance(stream_urls, list): |
|
|
|
stream_urls = [stream_urls] |
|
|
|
quality = stream.get('_quality') |
|
|
|
server = stream.get('_server') |
|
|
|
for stream_url in stream_urls: |
|
|
|
ext = determine_ext(stream_url) |
|
|
|
if ext == 'f4m': |
|
|
|
formats.extend(self._extract_f4m_formats( |
|
|
|
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', |
|
|
|
video_id, preference=-1, f4m_id='hds')) |
|
|
|
elif ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls')) |
|
|
|
else: |
|
|
|
if server and server.startswith('rtmp'): |
|
|
|
f = { |
|
|
|
'url': server, |
|
|
|
'play_path': stream_url, |
|
|
|
'format_id': 'a%s-rtmp-%s' % (num, quality), |
|
|
|
} |
|
|
|
elif stream_url.startswith('http'): |
|
|
|
f = { |
|
|
|
'url': stream_url, |
|
|
|
'format_id': 'a%s-%s-%s' % (num, ext, quality) |
|
|
|
} |
|
|
|
else: |
|
|
|
continue |
|
|
|
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) |
|
|
|
if m: |
|
|
|
f.update({ |
|
|
|
'width': int(m.group('width')), |
|
|
|
'height': int(m.group('height')), |
|
|
|
}) |
|
|
|
if type_ == 'audio': |
|
|
|
f['vcodec'] = 'none' |
|
|
|
formats.append(f) |
|
|
|
return formats |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# determine video id from url |
|
|
|
m = re.match(self._VALID_URL, url) |
|
|
@ -92,46 +173,22 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
'format_id': fid, |
|
|
|
'url': furl, |
|
|
|
}) |
|
|
|
self._sort_formats(formats) |
|
|
|
info = { |
|
|
|
'formats': formats, |
|
|
|
} |
|
|
|
else: # request JSON file |
|
|
|
media_info = self._download_json( |
|
|
|
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) |
|
|
|
# The second element of the _mediaArray contains the standard http urls |
|
|
|
streams = media_info['_mediaArray'][1]['_mediaStreamArray'] |
|
|
|
if not streams: |
|
|
|
if '"fsk"' in webpage: |
|
|
|
raise ExtractorError('This video is only available after 20:00') |
|
|
|
|
|
|
|
formats = [] |
|
|
|
for s in streams: |
|
|
|
if type(s['_stream']) == list: |
|
|
|
for index, url in enumerate(s['_stream'][::-1]): |
|
|
|
quality = s['_quality'] + index |
|
|
|
formats.append({ |
|
|
|
'quality': quality, |
|
|
|
'url': url, |
|
|
|
'format_id': '%s-%s' % (determine_ext(url), quality) |
|
|
|
}) |
|
|
|
continue |
|
|
|
|
|
|
|
format = { |
|
|
|
'quality': s['_quality'], |
|
|
|
'url': s['_stream'], |
|
|
|
} |
|
|
|
|
|
|
|
format['format_id'] = '%s-%s' % ( |
|
|
|
determine_ext(format['url']), format['quality']) |
|
|
|
info = self._extract_media_info( |
|
|
|
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) |
|
|
|
|
|
|
|
formats.append(format) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
return { |
|
|
|
info.update({ |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|
'formats': formats, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
} |
|
|
|
}) |
|
|
|
|
|
|
|
return info |
|
|
|
|
|
|
|
|
|
|
|
class ARDIE(InfoExtractor): |
|
|
|