You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
12 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. determine_ext,
  8. int_or_none,
  9. NO_DEFAULT,
  10. orderedSet,
  11. parse_codecs,
  12. qualities,
  13. try_get,
  14. unified_timestamp,
  15. update_url_query,
  16. url_or_none,
  17. urljoin,
  18. )
  19. class ZDFBaseIE(InfoExtractor):
  20. def _call_api(self, url, player, referrer, video_id, item):
  21. return self._download_json(
  22. url, video_id, 'Downloading JSON %s' % item,
  23. headers={
  24. 'Referer': referrer,
  25. 'Api-Auth': 'Bearer %s' % player['apiToken'],
  26. })
  27. def _extract_player(self, webpage, video_id, fatal=True):
  28. return self._parse_json(
  29. self._search_regex(
  30. r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
  31. 'player JSON', default='{}' if not fatal else NO_DEFAULT,
  32. group='json'),
  33. video_id)
  34. class ZDFIE(ZDFBaseIE):
  35. _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
  36. _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
  37. _TESTS = [{
  38. 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
  39. 'info_dict': {
  40. 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
  41. 'ext': 'mp4',
  42. 'title': 'Die Magie der Farben (2/2)',
  43. 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
  44. 'duration': 2615,
  45. 'timestamp': 1465021200,
  46. 'upload_date': '20160604',
  47. },
  48. }, {
  49. 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
  50. 'only_matching': True,
  51. }, {
  52. 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
  53. 'only_matching': True,
  54. }, {
  55. 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
  56. 'only_matching': True,
  57. }]
  58. @staticmethod
  59. def _extract_subtitles(src):
  60. subtitles = {}
  61. for caption in try_get(src, lambda x: x['captions'], list) or []:
  62. subtitle_url = url_or_none(caption.get('uri'))
  63. if subtitle_url:
  64. lang = caption.get('language', 'deu')
  65. subtitles.setdefault(lang, []).append({
  66. 'url': subtitle_url,
  67. })
  68. return subtitles
  69. def _extract_format(self, video_id, formats, format_urls, meta):
  70. format_url = url_or_none(meta.get('url'))
  71. if not format_url:
  72. return
  73. if format_url in format_urls:
  74. return
  75. format_urls.add(format_url)
  76. mime_type = meta.get('mimeType')
  77. ext = determine_ext(format_url)
  78. if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
  79. formats.extend(self._extract_m3u8_formats(
  80. format_url, video_id, 'mp4', m3u8_id='hls',
  81. entry_protocol='m3u8_native', fatal=False))
  82. elif mime_type == 'application/f4m+xml' or ext == 'f4m':
  83. formats.extend(self._extract_f4m_formats(
  84. update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
  85. else:
  86. f = parse_codecs(meta.get('mimeCodec'))
  87. format_id = ['http']
  88. for p in (meta.get('type'), meta.get('quality')):
  89. if p and isinstance(p, compat_str):
  90. format_id.append(p)
  91. f.update({
  92. 'url': format_url,
  93. 'format_id': '-'.join(format_id),
  94. 'format_note': meta.get('quality'),
  95. 'language': meta.get('language'),
  96. 'quality': qualities(self._QUALITIES)(meta.get('quality')),
  97. 'preference': -10,
  98. })
  99. formats.append(f)
  100. def _extract_entry(self, url, player, content, video_id):
  101. title = content.get('title') or content['teaserHeadline']
  102. t = content['mainVideoContent']['http://zdf.de/rels/target']
  103. ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
  104. if not ptmd_path:
  105. ptmd_path = t[
  106. 'http://zdf.de/rels/streams/ptmd-template'].replace(
  107. '{playerId}', 'portal')
  108. ptmd = self._call_api(
  109. urljoin(url, ptmd_path), player, url, video_id, 'metadata')
  110. formats = []
  111. track_uris = set()
  112. for p in ptmd['priorityList']:
  113. formitaeten = p.get('formitaeten')
  114. if not isinstance(formitaeten, list):
  115. continue
  116. for f in formitaeten:
  117. f_qualities = f.get('qualities')
  118. if not isinstance(f_qualities, list):
  119. continue
  120. for quality in f_qualities:
  121. tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
  122. if not tracks:
  123. continue
  124. for track in tracks:
  125. self._extract_format(
  126. video_id, formats, track_uris, {
  127. 'url': track.get('uri'),
  128. 'type': f.get('type'),
  129. 'mimeType': f.get('mimeType'),
  130. 'quality': quality.get('quality'),
  131. 'language': track.get('language'),
  132. })
  133. self._sort_formats(formats)
  134. thumbnails = []
  135. layouts = try_get(
  136. content, lambda x: x['teaserImageRef']['layouts'], dict)
  137. if layouts:
  138. for layout_key, layout_url in layouts.items():
  139. layout_url = url_or_none(layout_url)
  140. if not layout_url:
  141. continue
  142. thumbnail = {
  143. 'url': layout_url,
  144. 'format_id': layout_key,
  145. }
  146. mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
  147. if mobj:
  148. thumbnail.update({
  149. 'width': int(mobj.group('width')),
  150. 'height': int(mobj.group('height')),
  151. })
  152. thumbnails.append(thumbnail)
  153. return {
  154. 'id': video_id,
  155. 'title': title,
  156. 'description': content.get('leadParagraph') or content.get('teasertext'),
  157. 'duration': int_or_none(t.get('duration')),
  158. 'timestamp': unified_timestamp(content.get('editorialDate')),
  159. 'thumbnails': thumbnails,
  160. 'subtitles': self._extract_subtitles(ptmd),
  161. 'formats': formats,
  162. }
  163. def _extract_regular(self, url, player, video_id):
  164. content = self._call_api(
  165. player['content'], player, url, video_id, 'content')
  166. return self._extract_entry(player['content'], player, content, video_id)
  167. def _extract_mobile(self, video_id):
  168. document = self._download_json(
  169. 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
  170. video_id)['document']
  171. title = document['titel']
  172. formats = []
  173. format_urls = set()
  174. for f in document['formitaeten']:
  175. self._extract_format(video_id, formats, format_urls, f)
  176. self._sort_formats(formats)
  177. thumbnails = []
  178. teaser_bild = document.get('teaserBild')
  179. if isinstance(teaser_bild, dict):
  180. for thumbnail_key, thumbnail in teaser_bild.items():
  181. thumbnail_url = try_get(
  182. thumbnail, lambda x: x['url'], compat_str)
  183. if thumbnail_url:
  184. thumbnails.append({
  185. 'url': thumbnail_url,
  186. 'id': thumbnail_key,
  187. 'width': int_or_none(thumbnail.get('width')),
  188. 'height': int_or_none(thumbnail.get('height')),
  189. })
  190. return {
  191. 'id': video_id,
  192. 'title': title,
  193. 'description': document.get('beschreibung'),
  194. 'duration': int_or_none(document.get('length')),
  195. 'timestamp': unified_timestamp(try_get(
  196. document, lambda x: x['meta']['editorialDate'], compat_str)),
  197. 'thumbnails': thumbnails,
  198. 'subtitles': self._extract_subtitles(document),
  199. 'formats': formats,
  200. }
  201. def _real_extract(self, url):
  202. video_id = self._match_id(url)
  203. webpage = self._download_webpage(url, video_id, fatal=False)
  204. if webpage:
  205. player = self._extract_player(webpage, url, fatal=False)
  206. if player:
  207. return self._extract_regular(url, player, video_id)
  208. return self._extract_mobile(video_id)
  209. class ZDFChannelIE(ZDFBaseIE):
  210. _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  211. _TESTS = [{
  212. 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
  213. 'info_dict': {
  214. 'id': 'das-aktuelle-sportstudio',
  215. 'title': 'das aktuelle sportstudio | ZDF',
  216. },
  217. 'playlist_count': 21,
  218. }, {
  219. 'url': 'https://www.zdf.de/dokumentation/planet-e',
  220. 'info_dict': {
  221. 'id': 'planet-e',
  222. 'title': 'planet e.',
  223. },
  224. 'playlist_count': 4,
  225. }, {
  226. 'url': 'https://www.zdf.de/filme/taunuskrimi/',
  227. 'only_matching': True,
  228. }]
  229. @classmethod
  230. def suitable(cls, url):
  231. return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
  232. def _real_extract(self, url):
  233. channel_id = self._match_id(url)
  234. webpage = self._download_webpage(url, channel_id)
  235. entries = [
  236. self.url_result(item_url, ie=ZDFIE.ie_key())
  237. for item_url in orderedSet(re.findall(
  238. r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
  239. return self.playlist_result(
  240. entries, channel_id, self._og_search_title(webpage, fatal=False))
  241. r"""
  242. player = self._extract_player(webpage, channel_id)
  243. channel_id = self._search_regex(
  244. r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
  245. 'channel id', group='id')
  246. channel = self._call_api(
  247. 'https://api.zdf.de/content/documents/%s.json' % channel_id,
  248. player, url, channel_id)
  249. items = []
  250. for module in channel['module']:
  251. for teaser in try_get(module, lambda x: x['teaser'], list) or []:
  252. t = try_get(
  253. teaser, lambda x: x['http://zdf.de/rels/target'], dict)
  254. if not t:
  255. continue
  256. items.extend(try_get(
  257. t,
  258. lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
  259. list) or [])
  260. items.extend(try_get(
  261. module,
  262. lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
  263. list) or [])
  264. entries = []
  265. entry_urls = set()
  266. for item in items:
  267. t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
  268. if not t:
  269. continue
  270. sharing_url = t.get('http://zdf.de/rels/sharing-url')
  271. if not sharing_url or not isinstance(sharing_url, compat_str):
  272. continue
  273. if sharing_url in entry_urls:
  274. continue
  275. entry_urls.add(sharing_url)
  276. entries.append(self.url_result(
  277. sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
  278. return self.playlist_result(entries, channel_id, channel.get('title'))
  279. """