You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

311 lines
12 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. determine_ext,
  8. ExtractorError,
  9. int_or_none,
  10. parse_duration,
  11. parse_iso8601,
  12. xpath_element,
  13. xpath_text,
  14. )
  15. class BRIE(InfoExtractor):
  16. IE_DESC = 'Bayerischer Rundfunk'
  17. _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
  18. _TESTS = [
  19. {
  20. 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
  21. 'md5': '83a0477cf0b8451027eb566d88b51106',
  22. 'info_dict': {
  23. 'id': '48f656ef-287e-486f-be86-459122db22cc',
  24. 'ext': 'mp4',
  25. 'title': 'Die böse Überraschung',
  26. 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
  27. 'duration': 180,
  28. 'uploader': 'Reinhard Weber',
  29. 'upload_date': '20150422',
  30. },
  31. 'skip': '404 not found',
  32. },
  33. {
  34. 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
  35. 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
  36. 'info_dict': {
  37. 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
  38. 'ext': 'flv',
  39. 'title': 'Manfred Schreiber ist tot',
  40. 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
  41. 'duration': 26,
  42. },
  43. 'skip': '404 not found',
  44. },
  45. {
  46. 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
  47. 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
  48. 'info_dict': {
  49. 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
  50. 'ext': 'aac',
  51. 'title': 'Kurzweilig und sehr bewegend',
  52. 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
  53. 'duration': 296,
  54. },
  55. 'skip': '404 not found',
  56. },
  57. {
  58. 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
  59. 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
  60. 'info_dict': {
  61. 'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
  62. 'ext': 'mp4',
  63. 'title': 'Umweltbewusster Häuslebauer',
  64. 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
  65. 'duration': 116,
  66. }
  67. },
  68. {
  69. 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
  70. 'md5': '23bca295f1650d698f94fc570977dae3',
  71. 'info_dict': {
  72. 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
  73. 'ext': 'mp4',
  74. 'title': 'Folge 1 - Metaphysik',
  75. 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
  76. 'duration': 893,
  77. 'uploader': 'Eva Maria Steimle',
  78. 'upload_date': '20170208',
  79. }
  80. },
  81. ]
  82. def _real_extract(self, url):
  83. base_url, display_id = re.search(self._VALID_URL, url).groups()
  84. page = self._download_webpage(url, display_id)
  85. xml_url = self._search_regex(
  86. r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
  87. xml = self._download_xml(base_url + xml_url, display_id)
  88. medias = []
  89. for xml_media in xml.findall('video') + xml.findall('audio'):
  90. media_id = xml_media.get('externalId')
  91. media = {
  92. 'id': media_id,
  93. 'title': xpath_text(xml_media, 'title', 'title', True),
  94. 'duration': parse_duration(xpath_text(xml_media, 'duration')),
  95. 'formats': self._extract_formats(xpath_element(
  96. xml_media, 'assets'), media_id),
  97. 'thumbnails': self._extract_thumbnails(xpath_element(
  98. xml_media, 'teaserImage/variants'), base_url),
  99. 'description': xpath_text(xml_media, 'desc'),
  100. 'webpage_url': xpath_text(xml_media, 'permalink'),
  101. 'uploader': xpath_text(xml_media, 'author'),
  102. }
  103. broadcast_date = xpath_text(xml_media, 'broadcastDate')
  104. if broadcast_date:
  105. media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
  106. medias.append(media)
  107. if len(medias) > 1:
  108. self._downloader.report_warning(
  109. 'found multiple medias; please '
  110. 'report this with the video URL to http://yt-dl.org/bug')
  111. if not medias:
  112. raise ExtractorError('No media entries found')
  113. return medias[0]
  114. def _extract_formats(self, assets, media_id):
  115. formats = []
  116. for asset in assets.findall('asset'):
  117. format_url = xpath_text(asset, ['downloadUrl', 'url'])
  118. asset_type = asset.get('type')
  119. if asset_type.startswith('HDS'):
  120. formats.extend(self._extract_f4m_formats(
  121. format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
  122. elif asset_type.startswith('HLS'):
  123. formats.extend(self._extract_m3u8_formats(
  124. format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
  125. else:
  126. format_info = {
  127. 'ext': xpath_text(asset, 'mediaType'),
  128. 'width': int_or_none(xpath_text(asset, 'frameWidth')),
  129. 'height': int_or_none(xpath_text(asset, 'frameHeight')),
  130. 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
  131. 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
  132. 'vcodec': xpath_text(asset, 'codecVideo'),
  133. 'acodec': xpath_text(asset, 'codecAudio'),
  134. 'container': xpath_text(asset, 'mediaType'),
  135. 'filesize': int_or_none(xpath_text(asset, 'size')),
  136. }
  137. format_url = self._proto_relative_url(format_url)
  138. if format_url:
  139. http_format_info = format_info.copy()
  140. http_format_info.update({
  141. 'url': format_url,
  142. 'format_id': 'http-%s' % asset_type,
  143. })
  144. formats.append(http_format_info)
  145. server_prefix = xpath_text(asset, 'serverPrefix')
  146. if server_prefix:
  147. rtmp_format_info = format_info.copy()
  148. rtmp_format_info.update({
  149. 'url': server_prefix,
  150. 'play_path': xpath_text(asset, 'fileName'),
  151. 'format_id': 'rtmp-%s' % asset_type,
  152. })
  153. formats.append(rtmp_format_info)
  154. self._sort_formats(formats)
  155. return formats
  156. def _extract_thumbnails(self, variants, base_url):
  157. thumbnails = [{
  158. 'url': base_url + xpath_text(variant, 'url'),
  159. 'width': int_or_none(xpath_text(variant, 'width')),
  160. 'height': int_or_none(xpath_text(variant, 'height')),
  161. } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
  162. thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
  163. return thumbnails
  164. class BRMediathekIE(InfoExtractor):
  165. IE_DESC = 'Bayerischer Rundfunk Mediathek'
  166. _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
  167. _TESTS = [{
  168. 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
  169. 'md5': 'fdc3d485835966d1622587d08ba632ec',
  170. 'info_dict': {
  171. 'id': 'av:5a1e6a6e8fce6d001871cc8e',
  172. 'ext': 'mp4',
  173. 'title': 'Die Sendung vom 28.11.2017',
  174. 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
  175. 'timestamp': 1511942766,
  176. 'upload_date': '20171129',
  177. }
  178. }]
  179. def _real_extract(self, url):
  180. clip_id = self._match_id(url)
  181. clip = self._download_json(
  182. 'https://proxy-base.master.mango.express/graphql',
  183. clip_id, data=json.dumps({
  184. "query": """{
  185. viewer {
  186. clip(id: "%s") {
  187. title
  188. description
  189. duration
  190. createdAt
  191. ageRestriction
  192. videoFiles {
  193. edges {
  194. node {
  195. publicLocation
  196. fileSize
  197. videoProfile {
  198. width
  199. height
  200. bitrate
  201. encoding
  202. }
  203. }
  204. }
  205. }
  206. captionFiles {
  207. edges {
  208. node {
  209. publicLocation
  210. }
  211. }
  212. }
  213. teaserImages {
  214. edges {
  215. node {
  216. imageFiles {
  217. edges {
  218. node {
  219. publicLocation
  220. width
  221. height
  222. }
  223. }
  224. }
  225. }
  226. }
  227. }
  228. }
  229. }
  230. }""" % clip_id}).encode(), headers={
  231. 'Content-Type': 'application/json',
  232. })['data']['viewer']['clip']
  233. title = clip['title']
  234. formats = []
  235. for edge in clip.get('videoFiles', {}).get('edges', []):
  236. node = edge.get('node', {})
  237. n_url = node.get('publicLocation')
  238. if not n_url:
  239. continue
  240. ext = determine_ext(n_url)
  241. if ext == 'm3u8':
  242. formats.extend(self._extract_m3u8_formats(
  243. n_url, clip_id, 'mp4', 'm3u8_native',
  244. m3u8_id='hls', fatal=False))
  245. else:
  246. video_profile = node.get('videoProfile', {})
  247. tbr = int_or_none(video_profile.get('bitrate'))
  248. format_id = 'http'
  249. if tbr:
  250. format_id += '-%d' % tbr
  251. formats.append({
  252. 'format_id': format_id,
  253. 'url': n_url,
  254. 'width': int_or_none(video_profile.get('width')),
  255. 'height': int_or_none(video_profile.get('height')),
  256. 'tbr': tbr,
  257. 'filesize': int_or_none(node.get('fileSize')),
  258. })
  259. self._sort_formats(formats)
  260. subtitles = {}
  261. for edge in clip.get('captionFiles', {}).get('edges', []):
  262. node = edge.get('node', {})
  263. n_url = node.get('publicLocation')
  264. if not n_url:
  265. continue
  266. subtitles.setdefault('de', []).append({
  267. 'url': n_url,
  268. })
  269. thumbnails = []
  270. for edge in clip.get('teaserImages', {}).get('edges', []):
  271. for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
  272. node = image_edge.get('node', {})
  273. n_url = node.get('publicLocation')
  274. if not n_url:
  275. continue
  276. thumbnails.append({
  277. 'url': n_url,
  278. 'width': int_or_none(node.get('width')),
  279. 'height': int_or_none(node.get('height')),
  280. })
  281. return {
  282. 'id': clip_id,
  283. 'title': title,
  284. 'description': clip.get('description'),
  285. 'duration': int_or_none(clip.get('duration')),
  286. 'timestamp': parse_iso8601(clip.get('createdAt')),
  287. 'age_limit': int_or_none(clip.get('ageRestriction')),
  288. 'formats': formats,
  289. 'subtitles': subtitles,
  290. 'thumbnails': thumbnails,
  291. }