You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

486 lines
18 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. parse_iso8601,
  10. parse_duration,
  11. str_or_none,
  12. update_url_query,
  13. urljoin,
  14. )
  15. class TVNowBaseIE(InfoExtractor):
  16. _VIDEO_FIELDS = (
  17. 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
  18. 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
  19. 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
  20. 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
  21. def _call_api(self, path, video_id, query):
  22. return self._download_json(
  23. 'https://api.tvnow.de/v3/' + path, video_id, query=query)
  24. def _extract_video(self, info, display_id):
  25. video_id = compat_str(info['id'])
  26. title = info['title']
  27. paths = []
  28. for manifest_url in (info.get('manifest') or {}).values():
  29. if not manifest_url:
  30. continue
  31. manifest_url = update_url_query(manifest_url, {'filter': ''})
  32. path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
  33. if path in paths:
  34. continue
  35. paths.append(path)
  36. def url_repl(proto, suffix):
  37. return re.sub(
  38. r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
  39. r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
  40. '.ism/' + suffix, manifest_url))
  41. def make_urls(proto, suffix):
  42. urls = [url_repl(proto, suffix)]
  43. hd_url = urls[0].replace('/manifest/', '/ngvod/')
  44. if hd_url != urls[0]:
  45. urls.append(hd_url)
  46. return urls
  47. for man_url in make_urls('dash', '.mpd'):
  48. formats = self._extract_mpd_formats(
  49. man_url, video_id, mpd_id='dash', fatal=False)
  50. for man_url in make_urls('hss', 'Manifest'):
  51. formats.extend(self._extract_ism_formats(
  52. man_url, video_id, ism_id='mss', fatal=False))
  53. for man_url in make_urls('hls', '.m3u8'):
  54. formats.extend(self._extract_m3u8_formats(
  55. man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
  56. fatal=False))
  57. if formats:
  58. break
  59. else:
  60. if info.get('isDrm'):
  61. raise ExtractorError(
  62. 'Video %s is DRM protected' % video_id, expected=True)
  63. if info.get('geoblocked'):
  64. raise self.raise_geo_restricted()
  65. if not info.get('free', True):
  66. raise ExtractorError(
  67. 'Video %s is not available for free' % video_id, expected=True)
  68. self._sort_formats(formats)
  69. description = info.get('articleLong') or info.get('articleShort')
  70. timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
  71. duration = parse_duration(info.get('duration'))
  72. f = info.get('format', {})
  73. thumbnails = [{
  74. 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
  75. }]
  76. thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
  77. if thumbnail:
  78. thumbnails.append({
  79. 'url': thumbnail,
  80. })
  81. return {
  82. 'id': video_id,
  83. 'display_id': display_id,
  84. 'title': title,
  85. 'description': description,
  86. 'thumbnails': thumbnails,
  87. 'timestamp': timestamp,
  88. 'duration': duration,
  89. 'series': f.get('title'),
  90. 'season_number': int_or_none(info.get('season')),
  91. 'episode_number': int_or_none(info.get('episode')),
  92. 'episode': title,
  93. 'formats': formats,
  94. }
  95. class TVNowIE(TVNowBaseIE):
  96. _VALID_URL = r'''(?x)
  97. https?://
  98. (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
  99. (?P<show_id>[^/]+)/
  100. (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
  101. '''
  102. @classmethod
  103. def suitable(cls, url):
  104. return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
  105. else super(TVNowIE, cls).suitable(url))
  106. _TESTS = [{
  107. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
  108. 'info_dict': {
  109. 'id': '331082',
  110. 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
  111. 'ext': 'mp4',
  112. 'title': 'Der neue Porsche 911 GT 3',
  113. 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
  114. 'timestamp': 1495994400,
  115. 'upload_date': '20170528',
  116. 'duration': 5283,
  117. 'series': 'GRIP - Das Motormagazin',
  118. 'season_number': 14,
  119. 'episode_number': 405,
  120. 'episode': 'Der neue Porsche 911 GT 3',
  121. },
  122. }, {
  123. # rtl2
  124. 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
  125. 'only_matching': True,
  126. }, {
  127. # rtlnitro
  128. 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
  129. 'only_matching': True,
  130. }, {
  131. # superrtl
  132. 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
  133. 'only_matching': True,
  134. }, {
  135. # ntv
  136. 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
  137. 'only_matching': True,
  138. }, {
  139. # vox
  140. 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
  141. 'only_matching': True,
  142. }, {
  143. # rtlplus
  144. 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
  145. 'only_matching': True,
  146. }, {
  147. 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
  148. 'only_matching': True,
  149. }]
  150. def _real_extract(self, url):
  151. mobj = re.match(self._VALID_URL, url)
  152. display_id = '%s/%s' % mobj.group(2, 3)
  153. info = self._call_api(
  154. 'movies/' + display_id, display_id, query={
  155. 'fields': ','.join(self._VIDEO_FIELDS),
  156. })
  157. return self._extract_video(info, display_id)
  158. class TVNowNewIE(InfoExtractor):
  159. _VALID_URL = r'''(?x)
  160. (?P<base_url>https?://
  161. (?:www\.)?tvnow\.(?:de|at|ch)/
  162. (?:shows|serien))/
  163. (?P<show>[^/]+)-\d+/
  164. [^/]+/
  165. episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
  166. '''
  167. _TESTS = [{
  168. 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
  169. 'only_matching': True,
  170. }]
  171. def _real_extract(self, url):
  172. mobj = re.match(self._VALID_URL, url)
  173. base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
  174. show, episode = mobj.group('show', 'episode')
  175. return self.url_result(
  176. # Rewrite new URLs to the old format and use extraction via old API
  177. # at api.tvnow.de as a loophole for bypassing premium content checks
  178. '%s/%s/%s' % (base_url, show, episode),
  179. ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
  180. class TVNowNewBaseIE(InfoExtractor):
  181. def _call_api(self, path, video_id, query={}):
  182. result = self._download_json(
  183. 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
  184. error = result.get('error')
  185. if error:
  186. raise ExtractorError(
  187. '%s said: %s' % (self.IE_NAME, error), expected=True)
  188. return result
  189. r"""
  190. TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
  191. when api.tvnow.de is shut down. This version can't bypass premium checks though.
  192. class TVNowIE(TVNowNewBaseIE):
  193. _VALID_URL = r'''(?x)
  194. https?://
  195. (?:www\.)?tvnow\.(?:de|at|ch)/
  196. (?:shows|serien)/[^/]+/
  197. (?:[^/]+/)+
  198. (?P<display_id>[^/?$&]+)-(?P<id>\d+)
  199. '''
  200. _TESTS = [{
  201. # episode with annual navigation
  202. 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
  203. 'info_dict': {
  204. 'id': '331082',
  205. 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
  206. 'ext': 'mp4',
  207. 'title': 'Der neue Porsche 911 GT 3',
  208. 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
  209. 'thumbnail': r're:^https?://.*\.jpg$',
  210. 'timestamp': 1495994400,
  211. 'upload_date': '20170528',
  212. 'duration': 5283,
  213. 'series': 'GRIP - Das Motormagazin',
  214. 'season_number': 14,
  215. 'episode_number': 405,
  216. 'episode': 'Der neue Porsche 911 GT 3',
  217. },
  218. }, {
  219. # rtl2, episode with season navigation
  220. 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
  221. 'only_matching': True,
  222. }, {
  223. # rtlnitro
  224. 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
  225. 'only_matching': True,
  226. }, {
  227. # superrtl
  228. 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
  229. 'only_matching': True,
  230. }, {
  231. # ntv
  232. 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
  233. 'only_matching': True,
  234. }, {
  235. # vox
  236. 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
  237. 'only_matching': True,
  238. }, {
  239. 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
  240. 'only_matching': True,
  241. }]
  242. def _extract_video(self, info, url, display_id):
  243. config = info['config']
  244. source = config['source']
  245. video_id = compat_str(info.get('id') or source['videoId'])
  246. title = source['title'].strip()
  247. paths = []
  248. for manifest_url in (info.get('manifest') or {}).values():
  249. if not manifest_url:
  250. continue
  251. manifest_url = update_url_query(manifest_url, {'filter': ''})
  252. path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
  253. if path in paths:
  254. continue
  255. paths.append(path)
  256. def url_repl(proto, suffix):
  257. return re.sub(
  258. r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
  259. r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
  260. '.ism/' + suffix, manifest_url))
  261. formats = self._extract_mpd_formats(
  262. url_repl('dash', '.mpd'), video_id,
  263. mpd_id='dash', fatal=False)
  264. formats.extend(self._extract_ism_formats(
  265. url_repl('hss', 'Manifest'),
  266. video_id, ism_id='mss', fatal=False))
  267. formats.extend(self._extract_m3u8_formats(
  268. url_repl('hls', '.m3u8'), video_id, 'mp4',
  269. 'm3u8_native', m3u8_id='hls', fatal=False))
  270. if formats:
  271. break
  272. else:
  273. if try_get(info, lambda x: x['rights']['isDrm']):
  274. raise ExtractorError(
  275. 'Video %s is DRM protected' % video_id, expected=True)
  276. if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
  277. raise self.raise_geo_restricted()
  278. if not info.get('free', True):
  279. raise ExtractorError(
  280. 'Video %s is not available for free' % video_id, expected=True)
  281. self._sort_formats(formats)
  282. description = source.get('description')
  283. thumbnail = url_or_none(source.get('poster'))
  284. timestamp = unified_timestamp(source.get('previewStart'))
  285. duration = parse_duration(source.get('length'))
  286. series = source.get('format')
  287. season_number = int_or_none(self._search_regex(
  288. r'staffel-(\d+)', url, 'season number', default=None))
  289. episode_number = int_or_none(self._search_regex(
  290. r'episode-(\d+)', url, 'episode number', default=None))
  291. return {
  292. 'id': video_id,
  293. 'display_id': display_id,
  294. 'title': title,
  295. 'description': description,
  296. 'thumbnail': thumbnail,
  297. 'timestamp': timestamp,
  298. 'duration': duration,
  299. 'series': series,
  300. 'season_number': season_number,
  301. 'episode_number': episode_number,
  302. 'episode': title,
  303. 'formats': formats,
  304. }
  305. def _real_extract(self, url):
  306. display_id, video_id = re.match(self._VALID_URL, url).groups()
  307. info = self._call_api('player/' + video_id, video_id)
  308. return self._extract_video(info, video_id, display_id)
  309. """
  310. class TVNowListBaseIE(TVNowNewBaseIE):
  311. _SHOW_VALID_URL = r'''(?x)
  312. (?P<base_url>
  313. https?://
  314. (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
  315. [^/?#&]+-(?P<show_id>\d+)
  316. )
  317. '''
  318. @classmethod
  319. def suitable(cls, url):
  320. return (False if TVNowNewIE.suitable(url)
  321. else super(TVNowListBaseIE, cls).suitable(url))
  322. def _extract_items(self, url, show_id, list_id, query):
  323. items = self._call_api(
  324. 'teaserrow/format/episode/' + show_id, list_id,
  325. query=query)['items']
  326. entries = []
  327. for item in items:
  328. if not isinstance(item, dict):
  329. continue
  330. item_url = urljoin(url, item.get('url'))
  331. if not item_url:
  332. continue
  333. video_id = str_or_none(item.get('id') or item.get('videoId'))
  334. item_title = item.get('subheadline') or item.get('text')
  335. entries.append(self.url_result(
  336. item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
  337. video_title=item_title))
  338. return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
  339. class TVNowSeasonIE(TVNowListBaseIE):
  340. _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
  341. _TESTS = [{
  342. 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
  343. 'info_dict': {
  344. 'id': '1815/13',
  345. },
  346. 'playlist_mincount': 22,
  347. }]
  348. def _real_extract(self, url):
  349. _, show_id, season_id = re.match(self._VALID_URL, url).groups()
  350. return self._extract_items(
  351. url, show_id, season_id, {'season': season_id})
  352. class TVNowAnnualIE(TVNowListBaseIE):
  353. _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
  354. _TESTS = [{
  355. 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
  356. 'info_dict': {
  357. 'id': '1669/2017-05',
  358. },
  359. 'playlist_mincount': 2,
  360. }]
  361. def _real_extract(self, url):
  362. _, show_id, year, month = re.match(self._VALID_URL, url).groups()
  363. return self._extract_items(
  364. url, show_id, '%s-%s' % (year, month), {
  365. 'year': int(year),
  366. 'month': int(month),
  367. })
  368. class TVNowShowIE(TVNowListBaseIE):
  369. _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
  370. _TESTS = [{
  371. # annual navigationType
  372. 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
  373. 'info_dict': {
  374. 'id': '1669',
  375. },
  376. 'playlist_mincount': 73,
  377. }, {
  378. # season navigationType
  379. 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
  380. 'info_dict': {
  381. 'id': '11471',
  382. },
  383. 'playlist_mincount': 3,
  384. }]
  385. @classmethod
  386. def suitable(cls, url):
  387. return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
  388. else super(TVNowShowIE, cls).suitable(url))
  389. def _real_extract(self, url):
  390. base_url, show_id = re.match(self._VALID_URL, url).groups()
  391. result = self._call_api(
  392. 'teaserrow/format/navigation/' + show_id, show_id)
  393. items = result['items']
  394. entries = []
  395. navigation = result.get('navigationType')
  396. if navigation == 'annual':
  397. for item in items:
  398. if not isinstance(item, dict):
  399. continue
  400. year = int_or_none(item.get('year'))
  401. if year is None:
  402. continue
  403. months = item.get('months')
  404. if not isinstance(months, list):
  405. continue
  406. for month_dict in months:
  407. if not isinstance(month_dict, dict) or not month_dict:
  408. continue
  409. month_number = int_or_none(list(month_dict.keys())[0])
  410. if month_number is None:
  411. continue
  412. entries.append(self.url_result(
  413. '%s/%04d-%02d' % (base_url, year, month_number),
  414. ie=TVNowAnnualIE.ie_key()))
  415. elif navigation == 'season':
  416. for item in items:
  417. if not isinstance(item, dict):
  418. continue
  419. season_number = int_or_none(item.get('season'))
  420. if season_number is None:
  421. continue
  422. entries.append(self.url_result(
  423. '%s/staffel-%d' % (base_url, season_number),
  424. ie=TVNowSeasonIE.ie_key()))
  425. else:
  426. raise ExtractorError('Unknown navigationType')
  427. return self.playlist_result(entries, show_id)