You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

296 lines
10 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. parse_iso8601,
  10. parse_duration,
  11. try_get,
  12. update_url_query,
  13. )
  14. class TVNowBaseIE(InfoExtractor):
  15. _VIDEO_FIELDS = (
  16. 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
  17. 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
  18. 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
  19. 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
  20. def _call_api(self, path, video_id, query):
  21. return self._download_json(
  22. 'https://api.tvnow.de/v3/' + path,
  23. video_id, query=query)
  24. def _extract_video(self, info, display_id):
  25. video_id = compat_str(info['id'])
  26. title = info['title']
  27. paths = []
  28. for manifest_url in (info.get('manifest') or {}).values():
  29. if not manifest_url:
  30. continue
  31. manifest_url = update_url_query(manifest_url, {'filter': ''})
  32. path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
  33. if path in paths:
  34. continue
  35. paths.append(path)
  36. def url_repl(proto, suffix):
  37. return re.sub(
  38. r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
  39. r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
  40. '.ism/' + suffix, manifest_url))
  41. formats = self._extract_mpd_formats(
  42. url_repl('dash', '.mpd'), video_id,
  43. mpd_id='dash', fatal=False)
  44. formats.extend(self._extract_ism_formats(
  45. url_repl('hss', 'Manifest'),
  46. video_id, ism_id='mss', fatal=False))
  47. formats.extend(self._extract_m3u8_formats(
  48. url_repl('hls', '.m3u8'), video_id, 'mp4',
  49. 'm3u8_native', m3u8_id='hls', fatal=False))
  50. if formats:
  51. break
  52. else:
  53. if info.get('isDrm'):
  54. raise ExtractorError(
  55. 'Video %s is DRM protected' % video_id, expected=True)
  56. if info.get('geoblocked'):
  57. raise self.raise_geo_restricted()
  58. if not info.get('free', True):
  59. raise ExtractorError(
  60. 'Video %s is not available for free' % video_id, expected=True)
  61. self._sort_formats(formats)
  62. description = info.get('articleLong') or info.get('articleShort')
  63. timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
  64. duration = parse_duration(info.get('duration'))
  65. f = info.get('format', {})
  66. thumbnails = [{
  67. 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
  68. }]
  69. thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
  70. if thumbnail:
  71. thumbnails.append({
  72. 'url': thumbnail,
  73. })
  74. return {
  75. 'id': video_id,
  76. 'display_id': display_id,
  77. 'title': title,
  78. 'description': description,
  79. 'thumbnails': thumbnails,
  80. 'timestamp': timestamp,
  81. 'duration': duration,
  82. 'series': f.get('title'),
  83. 'season_number': int_or_none(info.get('season')),
  84. 'episode_number': int_or_none(info.get('episode')),
  85. 'episode': title,
  86. 'formats': formats,
  87. }
  88. class TVNowIE(TVNowBaseIE):
  89. _VALID_URL = r'''(?x)
  90. https?://
  91. (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
  92. (?P<show_id>[^/]+)/
  93. (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
  94. '''
  95. _TESTS = [{
  96. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
  97. 'info_dict': {
  98. 'id': '331082',
  99. 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
  100. 'ext': 'mp4',
  101. 'title': 'Der neue Porsche 911 GT 3',
  102. 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
  103. 'thumbnail': r're:^https?://.*\.jpg$',
  104. 'timestamp': 1495994400,
  105. 'upload_date': '20170528',
  106. 'duration': 5283,
  107. 'series': 'GRIP - Das Motormagazin',
  108. 'season_number': 14,
  109. 'episode_number': 405,
  110. 'episode': 'Der neue Porsche 911 GT 3',
  111. },
  112. }, {
  113. # rtl2
  114. 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
  115. 'only_matching': True,
  116. }, {
  117. # rtlnitro
  118. 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
  119. 'only_matching': True,
  120. }, {
  121. # superrtl
  122. 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
  123. 'only_matching': True,
  124. }, {
  125. # ntv
  126. 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
  127. 'only_matching': True,
  128. }, {
  129. # vox
  130. 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
  131. 'only_matching': True,
  132. }, {
  133. # rtlplus
  134. 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
  135. 'only_matching': True,
  136. }, {
  137. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
  138. 'only_matching': True,
  139. }]
  140. def _real_extract(self, url):
  141. mobj = re.match(self._VALID_URL, url)
  142. display_id = '%s/%s' % mobj.group(2, 3)
  143. info = self._call_api(
  144. 'movies/' + display_id, display_id, query={
  145. 'fields': ','.join(self._VIDEO_FIELDS),
  146. 'station': mobj.group(1),
  147. })
  148. return self._extract_video(info, display_id)
  149. class TVNowListBaseIE(TVNowBaseIE):
  150. _SHOW_VALID_URL = r'''(?x)
  151. (?P<base_url>
  152. https?://
  153. (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
  154. (?P<show_id>[^/]+)
  155. )
  156. '''
  157. def _extract_list_info(self, display_id, show_id):
  158. fields = list(self._SHOW_FIELDS)
  159. fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
  160. fields.extend(
  161. 'formatTabs.formatTabPages.container.movies.%s' % field
  162. for field in self._VIDEO_FIELDS)
  163. return self._call_api(
  164. 'formats/seo', display_id, query={
  165. 'fields': ','.join(fields),
  166. 'name': show_id + '.php'
  167. })
  168. class TVNowListIE(TVNowListBaseIE):
  169. _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
  170. _SHOW_FIELDS = ('title', )
  171. _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
  172. _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
  173. _TESTS = [{
  174. 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
  175. 'info_dict': {
  176. 'id': '28296',
  177. 'title': '30 Minuten Deutschland - Aktuell',
  178. },
  179. 'playlist_mincount': 1,
  180. }, {
  181. 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
  182. 'only_matching': True,
  183. }, {
  184. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
  185. 'only_matching': True,
  186. }]
  187. @classmethod
  188. def suitable(cls, url):
  189. return (False if TVNowIE.suitable(url)
  190. else super(TVNowListIE, cls).suitable(url))
  191. def _real_extract(self, url):
  192. base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
  193. list_info = self._extract_list_info(season_id, show_id)
  194. season = next(
  195. season for season in list_info['formatTabs']['items']
  196. if season.get('seoheadline') == season_id)
  197. title = list_info.get('title')
  198. headline = season.get('headline')
  199. if title and headline:
  200. title = '%s - %s' % (title, headline)
  201. else:
  202. title = headline or title
  203. entries = []
  204. for container in season['formatTabPages']['items']:
  205. items = try_get(
  206. container, lambda x: x['container']['movies']['items'],
  207. list) or []
  208. for info in items:
  209. seo_url = info.get('seoUrl')
  210. if not seo_url:
  211. continue
  212. video_id = info.get('id')
  213. entries.append(self.url_result(
  214. '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
  215. compat_str(video_id) if video_id else None))
  216. return self.playlist_result(
  217. entries, compat_str(season.get('id') or season_id), title)
  218. class TVNowShowIE(TVNowListBaseIE):
  219. _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
  220. _SHOW_FIELDS = ('id', 'title', )
  221. _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
  222. _VIDEO_FIELDS = ()
  223. _TESTS = [{
  224. 'url': 'https://www.tvnow.at/vox/ab-ins-beet',
  225. 'info_dict': {
  226. 'id': 'ab-ins-beet',
  227. 'title': 'Ab ins Beet!',
  228. },
  229. 'playlist_mincount': 7,
  230. }, {
  231. 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
  232. 'only_matching': True,
  233. }, {
  234. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
  235. 'only_matching': True,
  236. }]
  237. @classmethod
  238. def suitable(cls, url):
  239. return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
  240. else super(TVNowShowIE, cls).suitable(url))
  241. def _real_extract(self, url):
  242. base_url, show_id = re.match(self._VALID_URL, url).groups()
  243. list_info = self._extract_list_info(show_id, show_id)
  244. entries = []
  245. for season_info in list_info['formatTabs']['items']:
  246. season_url = season_info.get('seoheadline')
  247. if not season_url:
  248. continue
  249. season_id = season_info.get('id')
  250. entries.append(self.url_result(
  251. '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
  252. compat_str(season_id) if season_id else None,
  253. season_info.get('headline')))
  254. return self.playlist_result(entries, show_id, list_info.get('title'))