|
|
@ -1,6 +1,7 @@ |
|
|
|
# coding: utf-8 |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import json |
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
@ -22,66 +23,28 @@ from ..utils import ( |
|
|
|
from ..compat import compat_etree_fromstring |
|
|
|
|
|
|
|
|
|
|
|
class ARDMediathekIE(InfoExtractor): |
|
|
|
IE_NAME = 'ARD:mediathek' |
|
|
|
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' |
|
|
|
|
|
|
|
_TESTS = [{ |
|
|
|
# available till 26.07.2022 |
|
|
|
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', |
|
|
|
'info_dict': { |
|
|
|
'id': '44726822', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', |
|
|
|
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', |
|
|
|
'duration': 1740, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
# m3u8 download |
|
|
|
'skip_download': True, |
|
|
|
} |
|
|
|
}, { |
|
|
|
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
# audio |
|
|
|
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
# audio |
|
|
|
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', |
|
|
|
'only_matching': True, |
|
|
|
}] |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def suitable(cls, url): |
|
|
|
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) |
|
|
|
class ARDMediathekBaseIE(InfoExtractor): |
|
|
|
_GEO_COUNTRIES = ['DE'] |
|
|
|
|
|
|
|
def _extract_media_info(self, media_info_url, webpage, video_id): |
|
|
|
media_info = self._download_json( |
|
|
|
media_info_url, video_id, 'Downloading media JSON') |
|
|
|
return self._parse_media_info(media_info, video_id, '"fsk"' in webpage) |
|
|
|
|
|
|
|
def _parse_media_info(self, media_info, video_id, fsk): |
|
|
|
formats = self._extract_formats(media_info, video_id) |
|
|
|
|
|
|
|
if not formats: |
|
|
|
if '"fsk"' in webpage: |
|
|
|
if fsk: |
|
|
|
raise ExtractorError( |
|
|
|
'This video is only available after 20:00', expected=True) |
|
|
|
elif media_info.get('_geoblocked'): |
|
|
|
raise ExtractorError('This video is not available due to geo restriction', expected=True) |
|
|
|
self.raise_geo_restricted( |
|
|
|
'This video is not available due to geoblocking', |
|
|
|
countries=self._GEO_COUNTRIES) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
duration = int_or_none(media_info.get('_duration')) |
|
|
|
thumbnail = media_info.get('_previewImage') |
|
|
|
is_live = media_info.get('_isLive') is True |
|
|
|
|
|
|
|
subtitles = {} |
|
|
|
subtitle_url = media_info.get('_subtitleUrl') |
|
|
|
if subtitle_url: |
|
|
@ -92,9 +55,9 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'duration': duration, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'is_live': is_live, |
|
|
|
'duration': int_or_none(media_info.get('_duration')), |
|
|
|
'thumbnail': media_info.get('_previewImage'), |
|
|
|
'is_live': media_info.get('_isLive') is True, |
|
|
|
'formats': formats, |
|
|
|
'subtitles': subtitles, |
|
|
|
} |
|
|
@ -123,11 +86,11 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
update_url_query(stream_url, { |
|
|
|
'hdcore': '3.1.1', |
|
|
|
'plugin': 'aasp-3.1.1.69.124' |
|
|
|
}), |
|
|
|
video_id, f4m_id='hds', fatal=False)) |
|
|
|
}), video_id, f4m_id='hds', fatal=False)) |
|
|
|
elif ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) |
|
|
|
stream_url, video_id, 'mp4', 'm3u8_native', |
|
|
|
m3u8_id='hls', fatal=False)) |
|
|
|
else: |
|
|
|
if server and server.startswith('rtmp'): |
|
|
|
f = { |
|
|
@ -140,7 +103,9 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
'url': stream_url, |
|
|
|
'format_id': 'a%s-%s-%s' % (num, ext, quality) |
|
|
|
} |
|
|
|
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) |
|
|
|
m = re.search( |
|
|
|
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', |
|
|
|
stream_url) |
|
|
|
if m: |
|
|
|
f.update({ |
|
|
|
'width': int(m.group('width')), |
|
|
@ -151,6 +116,48 @@ class ARDMediathekIE(InfoExtractor): |
|
|
|
formats.append(f) |
|
|
|
return formats |
|
|
|
|
|
|
|
|
|
|
|
class ARDMediathekIE(ARDMediathekBaseIE): |
|
|
|
IE_NAME = 'ARD:mediathek' |
|
|
|
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' |
|
|
|
|
|
|
|
_TESTS = [{ |
|
|
|
# available till 26.07.2022 |
|
|
|
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', |
|
|
|
'info_dict': { |
|
|
|
'id': '44726822', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', |
|
|
|
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', |
|
|
|
'duration': 1740, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
# m3u8 download |
|
|
|
'skip_download': True, |
|
|
|
} |
|
|
|
}, { |
|
|
|
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
# audio |
|
|
|
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
# audio |
|
|
|
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', |
|
|
|
'only_matching': True, |
|
|
|
}] |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def suitable(cls, url): |
|
|
|
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# determine video id from url |
|
|
|
m = re.match(self._VALID_URL, url) |
|
|
@ -302,19 +309,20 @@ class ARDIE(InfoExtractor): |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class ARDBetaMediathekIE(InfoExtractor): |
|
|
|
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?' |
|
|
|
class ARDBetaMediathekIE(ARDMediathekBaseIE): |
|
|
|
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/(?P<client>[^/]+)/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita', |
|
|
|
'md5': '2d02d996156ea3c397cfc5036b5d7f8f', |
|
|
|
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f', |
|
|
|
'info_dict': { |
|
|
|
'display_id': 'die-robuste-roswita', |
|
|
|
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', |
|
|
|
'title': 'Tatort: Die robuste Roswita', |
|
|
|
'id': '70153354', |
|
|
|
'title': 'Die robuste Roswita', |
|
|
|
'description': r're:^Der Mord.*trüber ist als die Ilm.', |
|
|
|
'duration': 5316, |
|
|
|
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard', |
|
|
|
'upload_date': '20180826', |
|
|
|
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard', |
|
|
|
'timestamp': 1577047500, |
|
|
|
'upload_date': '20191222', |
|
|
|
'ext': 'mp4', |
|
|
|
}, |
|
|
|
}, { |
|
|
@ -330,71 +338,68 @@ class ARDBetaMediathekIE(InfoExtractor): |
|
|
|
video_id = mobj.group('video_id') |
|
|
|
display_id = mobj.group('display_id') or video_id |
|
|
|
|
|
|
|
webpage = self._download_webpage(url, display_id) |
|
|
|
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json') |
|
|
|
data = self._parse_json(data_json, display_id) |
|
|
|
|
|
|
|
res = { |
|
|
|
'id': video_id, |
|
|
|
'display_id': display_id, |
|
|
|
player_page = self._download_json( |
|
|
|
'https://api.ardmediathek.de/public-gateway', |
|
|
|
display_id, data=json.dumps({ |
|
|
|
'query': '''{ |
|
|
|
playerPage(client:"%s", clipId: "%s") { |
|
|
|
blockedByFsk |
|
|
|
broadcastedOn |
|
|
|
maturityContentRating |
|
|
|
mediaCollection { |
|
|
|
_duration |
|
|
|
_geoblocked |
|
|
|
_isLive |
|
|
|
_mediaArray { |
|
|
|
_mediaStreamArray { |
|
|
|
_quality |
|
|
|
_server |
|
|
|
_stream |
|
|
|
} |
|
|
|
formats = [] |
|
|
|
subtitles = {} |
|
|
|
geoblocked = False |
|
|
|
for widget in data.values(): |
|
|
|
if widget.get('_geoblocked') is True: |
|
|
|
geoblocked = True |
|
|
|
if '_duration' in widget: |
|
|
|
res['duration'] = int_or_none(widget['_duration']) |
|
|
|
if 'clipTitle' in widget: |
|
|
|
res['title'] = widget['clipTitle'] |
|
|
|
if '_previewImage' in widget: |
|
|
|
res['thumbnail'] = widget['_previewImage'] |
|
|
|
if 'broadcastedOn' in widget: |
|
|
|
res['timestamp'] = unified_timestamp(widget['broadcastedOn']) |
|
|
|
if 'synopsis' in widget: |
|
|
|
res['description'] = widget['synopsis'] |
|
|
|
subtitle_url = url_or_none(widget.get('_subtitleUrl')) |
|
|
|
if subtitle_url: |
|
|
|
subtitles.setdefault('de', []).append({ |
|
|
|
'ext': 'ttml', |
|
|
|
'url': subtitle_url, |
|
|
|
}) |
|
|
|
if '_quality' in widget: |
|
|
|
format_url = url_or_none(try_get( |
|
|
|
widget, lambda x: x['_stream']['json'][0])) |
|
|
|
if not format_url: |
|
|
|
continue |
|
|
|
ext = determine_ext(format_url) |
|
|
|
if ext == 'f4m': |
|
|
|
formats.extend(self._extract_f4m_formats( |
|
|
|
format_url + '?hdcore=3.11.0', |
|
|
|
video_id, f4m_id='hds', fatal=False)) |
|
|
|
elif ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
format_url, video_id, 'mp4', m3u8_id='hls', |
|
|
|
fatal=False)) |
|
|
|
else: |
|
|
|
# HTTP formats are not available when geoblocked is True, |
|
|
|
# other formats are fine though |
|
|
|
if geoblocked: |
|
|
|
continue |
|
|
|
quality = str_or_none(widget.get('_quality')) |
|
|
|
formats.append({ |
|
|
|
'format_id': ('http-' + quality) if quality else 'http', |
|
|
|
'url': format_url, |
|
|
|
'preference': 10, # Plain HTTP, that's nice |
|
|
|
}) |
|
|
|
|
|
|
|
if not formats and geoblocked: |
|
|
|
self.raise_geo_restricted( |
|
|
|
msg='This video is not available due to geoblocking', |
|
|
|
countries=['DE']) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
res.update({ |
|
|
|
'subtitles': subtitles, |
|
|
|
'formats': formats, |
|
|
|
} |
|
|
|
_previewImage |
|
|
|
_subtitleUrl |
|
|
|
_type |
|
|
|
} |
|
|
|
show { |
|
|
|
title |
|
|
|
} |
|
|
|
synopsis |
|
|
|
title |
|
|
|
tracking { |
|
|
|
atiCustomVars { |
|
|
|
contentId |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}''' % (mobj.group('client'), video_id), |
|
|
|
}).encode(), headers={ |
|
|
|
'Content-Type': 'application/json' |
|
|
|
})['data']['playerPage'] |
|
|
|
title = player_page['title'] |
|
|
|
content_id = str_or_none(try_get( |
|
|
|
player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) |
|
|
|
media_collection = player_page.get('mediaCollection') or {} |
|
|
|
if not media_collection and content_id: |
|
|
|
media_collection = self._download_json( |
|
|
|
'https://www.ardmediathek.de/play/media/' + content_id, |
|
|
|
content_id, fatal=False) or {} |
|
|
|
info = self._parse_media_info( |
|
|
|
media_collection, content_id or video_id, |
|
|
|
player_page.get('blockedByFsk')) |
|
|
|
age_limit = None |
|
|
|
description = player_page.get('synopsis') |
|
|
|
maturity_content_rating = player_page.get('maturityContentRating') |
|
|
|
if maturity_content_rating: |
|
|
|
age_limit = int_or_none(maturity_content_rating.lstrip('FSK')) |
|
|
|
if not age_limit: |
|
|
|
age_limit = int_or_none(self._search_regex(r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None)) |
|
|
|
info.update({ |
|
|
|
'age_limit': age_limit, |
|
|
|
'display_id': display_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|
'timestamp': unified_timestamp(player_page.get('broadcastedOn')), |
|
|
|
'series': try_get(player_page, lambda x: x['show']['title']), |
|
|
|
}) |
|
|
|
|
|
|
|
return res |
|
|
|
return info |