You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

717 lines
25 KiB

8 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_urllib_parse_unquote,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. int_or_none,
  12. JSON_LD_RE,
  13. js_to_json,
  14. NO_DEFAULT,
  15. parse_age_limit,
  16. parse_duration,
  17. try_get,
  18. )
  19. class NRKBaseIE(InfoExtractor):
  20. _GEO_COUNTRIES = ['NO']
  21. _api_host = None
  22. def _real_extract(self, url):
  23. video_id = self._match_id(url)
  24. api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
  25. for api_host in api_hosts:
  26. data = self._download_json(
  27. 'http://%s/mediaelement/%s' % (api_host, video_id),
  28. video_id, 'Downloading mediaelement JSON',
  29. fatal=api_host == api_hosts[-1])
  30. if not data:
  31. continue
  32. self._api_host = api_host
  33. break
  34. title = data.get('fullTitle') or data.get('mainTitle') or data['title']
  35. video_id = data.get('id') or video_id
  36. entries = []
  37. conviva = data.get('convivaStatistics') or {}
  38. live = (data.get('mediaElementType') == 'Live'
  39. or data.get('isLive') is True or conviva.get('isLive'))
  40. def make_title(t):
  41. return self._live_title(t) if live else t
  42. media_assets = data.get('mediaAssets')
  43. if media_assets and isinstance(media_assets, list):
  44. def video_id_and_title(idx):
  45. return ((video_id, title) if len(media_assets) == 1
  46. else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
  47. for num, asset in enumerate(media_assets, 1):
  48. asset_url = asset.get('url')
  49. if not asset_url:
  50. continue
  51. formats = self._extract_akamai_formats(asset_url, video_id)
  52. if not formats:
  53. continue
  54. self._sort_formats(formats)
  55. # Some f4m streams may not work with hdcore in fragments' URLs
  56. for f in formats:
  57. extra_param = f.get('extra_param_to_segment_url')
  58. if extra_param and 'hdcore' in extra_param:
  59. del f['extra_param_to_segment_url']
  60. entry_id, entry_title = video_id_and_title(num)
  61. duration = parse_duration(asset.get('duration'))
  62. subtitles = {}
  63. for subtitle in ('webVtt', 'timedText'):
  64. subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
  65. if subtitle_url:
  66. subtitles.setdefault('no', []).append({
  67. 'url': compat_urllib_parse_unquote(subtitle_url)
  68. })
  69. entries.append({
  70. 'id': asset.get('carrierId') or entry_id,
  71. 'title': make_title(entry_title),
  72. 'duration': duration,
  73. 'subtitles': subtitles,
  74. 'formats': formats,
  75. })
  76. if not entries:
  77. media_url = data.get('mediaUrl')
  78. if media_url:
  79. formats = self._extract_akamai_formats(media_url, video_id)
  80. self._sort_formats(formats)
  81. duration = parse_duration(data.get('duration'))
  82. entries = [{
  83. 'id': video_id,
  84. 'title': make_title(title),
  85. 'duration': duration,
  86. 'formats': formats,
  87. }]
  88. if not entries:
  89. MESSAGES = {
  90. 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
  91. 'ProgramRightsHasExpired': 'Programmet har gått ut',
  92. 'NoProgramRights': 'Ikke tilgjengelig',
  93. 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
  94. }
  95. message_type = data.get('messageType', '')
  96. # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
  97. if 'IsGeoBlocked' in message_type:
  98. self.raise_geo_restricted(
  99. msg=MESSAGES.get('ProgramIsGeoBlocked'),
  100. countries=self._GEO_COUNTRIES)
  101. raise ExtractorError(
  102. '%s said: %s' % (self.IE_NAME, MESSAGES.get(
  103. message_type, message_type)),
  104. expected=True)
  105. series = conviva.get('seriesName') or data.get('seriesTitle')
  106. episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
  107. season_number = None
  108. episode_number = None
  109. if data.get('mediaElementType') == 'Episode':
  110. _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
  111. data.get('relativeOriginUrl', '')
  112. EPISODENUM_RE = [
  113. r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
  114. r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
  115. ]
  116. season_number = int_or_none(self._search_regex(
  117. EPISODENUM_RE, _season_episode, 'season number',
  118. default=None, group='season'))
  119. episode_number = int_or_none(self._search_regex(
  120. EPISODENUM_RE, _season_episode, 'episode number',
  121. default=None, group='episode'))
  122. thumbnails = None
  123. images = data.get('images')
  124. if images and isinstance(images, dict):
  125. web_images = images.get('webImages')
  126. if isinstance(web_images, list):
  127. thumbnails = [{
  128. 'url': image['imageUrl'],
  129. 'width': int_or_none(image.get('width')),
  130. 'height': int_or_none(image.get('height')),
  131. } for image in web_images if image.get('imageUrl')]
  132. description = data.get('description')
  133. category = data.get('mediaAnalytics', {}).get('category')
  134. common_info = {
  135. 'description': description,
  136. 'series': series,
  137. 'episode': episode,
  138. 'season_number': season_number,
  139. 'episode_number': episode_number,
  140. 'categories': [category] if category else None,
  141. 'age_limit': parse_age_limit(data.get('legalAge')),
  142. 'thumbnails': thumbnails,
  143. }
  144. vcodec = 'none' if data.get('mediaType') == 'Audio' else None
  145. for entry in entries:
  146. entry.update(common_info)
  147. for f in entry['formats']:
  148. f['vcodec'] = vcodec
  149. points = data.get('shortIndexPoints')
  150. if isinstance(points, list):
  151. chapters = []
  152. for next_num, point in enumerate(points, start=1):
  153. if not isinstance(point, dict):
  154. continue
  155. start_time = parse_duration(point.get('startPoint'))
  156. if start_time is None:
  157. continue
  158. end_time = parse_duration(
  159. data.get('duration')
  160. if next_num == len(points)
  161. else points[next_num].get('startPoint'))
  162. if end_time is None:
  163. continue
  164. chapters.append({
  165. 'start_time': start_time,
  166. 'end_time': end_time,
  167. 'title': point.get('title'),
  168. })
  169. if chapters and len(entries) == 1:
  170. entries[0]['chapters'] = chapters
  171. return self.playlist_result(entries, video_id, title, description)
  172. class NRKIE(NRKBaseIE):
  173. _VALID_URL = r'''(?x)
  174. (?:
  175. nrk:|
  176. https?://
  177. (?:
  178. (?:www\.)?nrk\.no/video/PS\*|
  179. v8[-.]psapi\.nrk\.no/mediaelement/
  180. )
  181. )
  182. (?P<id>[^?#&]+)
  183. '''
  184. _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
  185. _TESTS = [{
  186. # video
  187. 'url': 'http://www.nrk.no/video/PS*150533',
  188. 'md5': '706f34cdf1322577589e369e522b50ef',
  189. 'info_dict': {
  190. 'id': '150533',
  191. 'ext': 'mp4',
  192. 'title': 'Dompap og andre fugler i Piip-Show',
  193. 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
  194. 'duration': 262,
  195. }
  196. }, {
  197. # audio
  198. 'url': 'http://www.nrk.no/video/PS*154915',
  199. # MD5 is unstable
  200. 'info_dict': {
  201. 'id': '154915',
  202. 'ext': 'flv',
  203. 'title': 'Slik høres internett ut når du er blind',
  204. 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
  205. 'duration': 20,
  206. }
  207. }, {
  208. 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
  209. 'only_matching': True,
  210. }, {
  211. 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
  212. 'only_matching': True,
  213. }, {
  214. 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
  215. 'only_matching': True,
  216. }]
  217. class NRKTVIE(NRKBaseIE):
  218. IE_DESC = 'NRK TV and NRK Radio'
  219. _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
  220. _VALID_URL = r'''(?x)
  221. https?://
  222. (?:tv|radio)\.nrk(?:super)?\.no/
  223. (?:serie(?:/[^/]+){1,2}|program)/
  224. (?![Ee]pisodes)%s
  225. (?:/\d{2}-\d{2}-\d{4})?
  226. (?:\#del=(?P<part_id>\d+))?
  227. ''' % _EPISODE_RE
  228. _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
  229. _TESTS = [{
  230. 'url': 'https://tv.nrk.no/program/MDDP12000117',
  231. 'md5': '8270824df46ec629b66aeaa5796b36fb',
  232. 'info_dict': {
  233. 'id': 'MDDP12000117AA',
  234. 'ext': 'mp4',
  235. 'title': 'Alarm Trolltunga',
  236. 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
  237. 'duration': 2223,
  238. 'age_limit': 6,
  239. },
  240. }, {
  241. 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
  242. 'md5': '9a167e54d04671eb6317a37b7bc8a280',
  243. 'info_dict': {
  244. 'id': 'MUHH48000314AA',
  245. 'ext': 'mp4',
  246. 'title': '20 spørsmål 23.05.2014',
  247. 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
  248. 'duration': 1741,
  249. 'series': '20 spørsmål',
  250. 'episode': '23.05.2014',
  251. },
  252. 'skip': 'NoProgramRights',
  253. }, {
  254. 'url': 'https://tv.nrk.no/program/mdfp15000514',
  255. 'info_dict': {
  256. 'id': 'MDFP15000514CA',
  257. 'ext': 'mp4',
  258. 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
  259. 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
  260. 'duration': 4605,
  261. 'series': 'Kunnskapskanalen',
  262. 'episode': '24.05.2014',
  263. },
  264. 'params': {
  265. 'skip_download': True,
  266. },
  267. }, {
  268. # single playlist video
  269. 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
  270. 'info_dict': {
  271. 'id': 'MSPO40010515-part2',
  272. 'ext': 'flv',
  273. 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
  274. 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
  275. },
  276. 'params': {
  277. 'skip_download': True,
  278. },
  279. 'expected_warnings': ['Video is geo restricted'],
  280. 'skip': 'particular part is not supported currently',
  281. }, {
  282. 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
  283. 'playlist': [{
  284. 'info_dict': {
  285. 'id': 'MSPO40010515AH',
  286. 'ext': 'mp4',
  287. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
  288. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  289. 'duration': 772,
  290. 'series': 'Tour de Ski',
  291. 'episode': '06.01.2015',
  292. },
  293. 'params': {
  294. 'skip_download': True,
  295. },
  296. }, {
  297. 'info_dict': {
  298. 'id': 'MSPO40010515BH',
  299. 'ext': 'mp4',
  300. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
  301. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  302. 'duration': 6175,
  303. 'series': 'Tour de Ski',
  304. 'episode': '06.01.2015',
  305. },
  306. 'params': {
  307. 'skip_download': True,
  308. },
  309. }],
  310. 'info_dict': {
  311. 'id': 'MSPO40010515',
  312. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
  313. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  314. },
  315. 'expected_warnings': ['Video is geo restricted'],
  316. }, {
  317. 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
  318. 'info_dict': {
  319. 'id': 'KMTE50001317AA',
  320. 'ext': 'mp4',
  321. 'title': 'Anno 13:30',
  322. 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
  323. 'duration': 2340,
  324. 'series': 'Anno',
  325. 'episode': '13:30',
  326. 'season_number': 3,
  327. 'episode_number': 13,
  328. },
  329. 'params': {
  330. 'skip_download': True,
  331. },
  332. }, {
  333. 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
  334. 'info_dict': {
  335. 'id': 'MUHH46000317AA',
  336. 'ext': 'mp4',
  337. 'title': 'Nytt på Nytt 27.01.2017',
  338. 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
  339. 'duration': 1796,
  340. 'series': 'Nytt på nytt',
  341. 'episode': '27.01.2017',
  342. },
  343. 'params': {
  344. 'skip_download': True,
  345. },
  346. }, {
  347. 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
  348. 'only_matching': True,
  349. }, {
  350. 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
  351. 'only_matching': True,
  352. }]
  353. class NRKTVEpisodeIE(InfoExtractor):
  354. _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
  355. _TESTS = [{
  356. 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
  357. 'info_dict': {
  358. 'id': 'MUHH36005220BA',
  359. 'ext': 'mp4',
  360. 'title': 'Kro, krig og kjærlighet 2:6',
  361. 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
  362. 'duration': 1563,
  363. 'series': 'Hellums kro',
  364. 'season_number': 1,
  365. 'episode_number': 2,
  366. 'episode': '2:6',
  367. 'age_limit': 6,
  368. },
  369. 'params': {
  370. 'skip_download': True,
  371. },
  372. }, {
  373. 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
  374. 'info_dict': {
  375. 'id': 'MSUI14000816AA',
  376. 'ext': 'mp4',
  377. 'title': 'Backstage 8:30',
  378. 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
  379. 'duration': 1320,
  380. 'series': 'Backstage',
  381. 'season_number': 1,
  382. 'episode_number': 8,
  383. 'episode': '8:30',
  384. },
  385. 'params': {
  386. 'skip_download': True,
  387. },
  388. 'skip': 'ProgramRightsHasExpired',
  389. }]
  390. def _real_extract(self, url):
  391. display_id = self._match_id(url)
  392. webpage = self._download_webpage(url, display_id)
  393. nrk_id = self._parse_json(
  394. self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
  395. display_id)['@id']
  396. assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
  397. return self.url_result(
  398. 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
  399. class NRKTVSerieBaseIE(InfoExtractor):
  400. def _extract_series(self, webpage, display_id, fatal=True):
  401. config = self._parse_json(
  402. self._search_regex(
  403. (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
  404. r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
  405. webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
  406. display_id, fatal=False, transform_source=js_to_json)
  407. if not config:
  408. return
  409. return try_get(
  410. config,
  411. (lambda x: x['initialState']['series'], lambda x: x['series']),
  412. dict)
  413. def _extract_seasons(self, seasons):
  414. if not isinstance(seasons, list):
  415. return []
  416. entries = []
  417. for season in seasons:
  418. entries.extend(self._extract_episodes(season))
  419. return entries
  420. def _extract_episodes(self, season):
  421. if not isinstance(season, dict):
  422. return []
  423. return self._extract_entries(season.get('episodes'))
  424. def _extract_entries(self, entry_list):
  425. if not isinstance(entry_list, list):
  426. return []
  427. entries = []
  428. for episode in entry_list:
  429. nrk_id = episode.get('prfId')
  430. if not nrk_id or not isinstance(nrk_id, compat_str):
  431. continue
  432. entries.append(self.url_result(
  433. 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
  434. return entries
  435. class NRKTVSeasonIE(NRKTVSerieBaseIE):
  436. _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
  437. _TEST = {
  438. 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
  439. 'info_dict': {
  440. 'id': '1',
  441. 'title': 'Sesong 1',
  442. },
  443. 'playlist_mincount': 30,
  444. }
  445. @classmethod
  446. def suitable(cls, url):
  447. return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
  448. else super(NRKTVSeasonIE, cls).suitable(url))
  449. def _real_extract(self, url):
  450. display_id = self._match_id(url)
  451. webpage = self._download_webpage(url, display_id)
  452. series = self._extract_series(webpage, display_id)
  453. season = next(
  454. s for s in series['seasons']
  455. if int(display_id) == s.get('seasonNumber'))
  456. title = try_get(season, lambda x: x['titles']['title'], compat_str)
  457. return self.playlist_result(
  458. self._extract_episodes(season), display_id, title)
  459. class NRKTVSeriesIE(NRKTVSerieBaseIE):
  460. _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
  461. _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
  462. _TESTS = [{
  463. 'url': 'https://tv.nrk.no/serie/blank',
  464. 'info_dict': {
  465. 'id': 'blank',
  466. 'title': 'Blank',
  467. 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
  468. },
  469. 'playlist_mincount': 30,
  470. }, {
  471. # new layout, seasons
  472. 'url': 'https://tv.nrk.no/serie/backstage',
  473. 'info_dict': {
  474. 'id': 'backstage',
  475. 'title': 'Backstage',
  476. 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
  477. },
  478. 'playlist_mincount': 60,
  479. }, {
  480. # new layout, instalments
  481. 'url': 'https://tv.nrk.no/serie/groenn-glede',
  482. 'info_dict': {
  483. 'id': 'groenn-glede',
  484. 'title': 'Grønn glede',
  485. 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
  486. },
  487. 'playlist_mincount': 10,
  488. }, {
  489. # old layout
  490. 'url': 'https://tv.nrksuper.no/serie/labyrint',
  491. 'info_dict': {
  492. 'id': 'labyrint',
  493. 'title': 'Labyrint',
  494. 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
  495. },
  496. 'playlist_mincount': 3,
  497. }, {
  498. 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
  499. 'only_matching': True,
  500. }, {
  501. 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
  502. 'only_matching': True,
  503. }, {
  504. 'url': 'https://tv.nrk.no/serie/postmann-pat',
  505. 'only_matching': True,
  506. }]
  507. @classmethod
  508. def suitable(cls, url):
  509. return (
  510. False if any(ie.suitable(url)
  511. for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
  512. else super(NRKTVSeriesIE, cls).suitable(url))
  513. def _real_extract(self, url):
  514. series_id = self._match_id(url)
  515. webpage = self._download_webpage(url, series_id)
  516. # New layout (e.g. https://tv.nrk.no/serie/backstage)
  517. series = self._extract_series(webpage, series_id, fatal=False)
  518. if series:
  519. title = try_get(series, lambda x: x['titles']['title'], compat_str)
  520. description = try_get(
  521. series, lambda x: x['titles']['subtitle'], compat_str)
  522. entries = []
  523. entries.extend(self._extract_seasons(series.get('seasons')))
  524. entries.extend(self._extract_entries(series.get('instalments')))
  525. entries.extend(self._extract_episodes(series.get('extraMaterial')))
  526. return self.playlist_result(entries, series_id, title, description)
  527. # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
  528. entries = [
  529. self.url_result(
  530. 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
  531. series=series_id, season=season_id))
  532. for season_id in re.findall(self._ITEM_RE, webpage)
  533. ]
  534. title = self._html_search_meta(
  535. 'seriestitle', webpage,
  536. 'title', default=None) or self._og_search_title(
  537. webpage, fatal=False)
  538. if title:
  539. title = self._search_regex(
  540. r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
  541. description = self._html_search_meta(
  542. 'series_description', webpage,
  543. 'description', default=None) or self._og_search_description(webpage)
  544. return self.playlist_result(entries, series_id, title, description)
  545. class NRKTVDirekteIE(NRKTVIE):
  546. IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
  547. _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
  548. _TESTS = [{
  549. 'url': 'https://tv.nrk.no/direkte/nrk1',
  550. 'only_matching': True,
  551. }, {
  552. 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
  553. 'only_matching': True,
  554. }]
  555. class NRKPlaylistBaseIE(InfoExtractor):
  556. def _extract_description(self, webpage):
  557. pass
  558. def _real_extract(self, url):
  559. playlist_id = self._match_id(url)
  560. webpage = self._download_webpage(url, playlist_id)
  561. entries = [
  562. self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
  563. for video_id in re.findall(self._ITEM_RE, webpage)
  564. ]
  565. playlist_title = self. _extract_title(webpage)
  566. playlist_description = self._extract_description(webpage)
  567. return self.playlist_result(
  568. entries, playlist_id, playlist_title, playlist_description)
  569. class NRKPlaylistIE(NRKPlaylistBaseIE):
  570. _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
  571. _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
  572. _TESTS = [{
  573. 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
  574. 'info_dict': {
  575. 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
  576. 'title': 'Gjenopplev den historiske solformørkelsen',
  577. 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
  578. },
  579. 'playlist_count': 2,
  580. }, {
  581. 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
  582. 'info_dict': {
  583. 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
  584. 'title': 'Rivertonprisen til Karin Fossum',
  585. 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
  586. },
  587. 'playlist_count': 2,
  588. }]
  589. def _extract_title(self, webpage):
  590. return self._og_search_title(webpage, fatal=False)
  591. def _extract_description(self, webpage):
  592. return self._og_search_description(webpage)
  593. class NRKTVEpisodesIE(NRKPlaylistBaseIE):
  594. _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
  595. _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
  596. _TESTS = [{
  597. 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
  598. 'info_dict': {
  599. 'id': '69031',
  600. 'title': 'Nytt på nytt, sesong: 201210',
  601. },
  602. 'playlist_count': 4,
  603. }]
  604. def _extract_title(self, webpage):
  605. return self._html_search_regex(
  606. r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
  607. class NRKSkoleIE(InfoExtractor):
  608. IE_DESC = 'NRK Skole'
  609. _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
  610. _TESTS = [{
  611. 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
  612. 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
  613. 'info_dict': {
  614. 'id': '6021',
  615. 'ext': 'mp4',
  616. 'title': 'Genetikk og eneggede tvillinger',
  617. 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
  618. 'duration': 399,
  619. },
  620. }, {
  621. 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
  622. 'only_matching': True,
  623. }]
  624. def _real_extract(self, url):
  625. video_id = self._match_id(url)
  626. webpage = self._download_webpage(
  627. 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
  628. video_id)
  629. nrk_id = self._parse_json(
  630. self._search_regex(
  631. r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
  632. webpage, 'application json'),
  633. video_id)['activeMedia']['psId']
  634. return self.url_result('nrk:%s' % nrk_id)