Browse Source

[nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258)

master-ytdl-org
Sergey M․ 6 years ago
parent
commit
15699ec8b0
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 43 additions and 25 deletions
  1. +43
    -25
      youtube_dl/extractor/nrk.py

+ 43
- 25
youtube_dl/extractor/nrk.py View File

@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
'md5': '706f34cdf1322577589e369e522b50ef',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'duration': 263,
'duration': 262,
}
}, {
# audio
@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '4e9ca6629f09e588ed240fb11619922a',
'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
'series': '20 spørsmål - TV',
'series': '20 spørsmål',
'episode': '23.05.2014',
},
}, {
@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'description': 'md5:c03aba1e917561eface5214020551b7a',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'description': 'md5:c03aba1e917561eface5214020551b7a',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:c03aba1e917561eface5214020551b7a',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
},
'expected_warnings': ['Video is geo restricted'],
}, {
@ -406,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json(
self._search_regex(
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
default='{}' if not fatal else NO_DEFAULT),
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False)
if not config:
return
return try_get(config, lambda x: x['series'], dict)
return try_get(
config,
(lambda x: x['initialState']['series'], lambda x: x['series']),
dict)
def _extract_seasons(self, seasons):
if not isinstance(seasons, list):
return []
entries = []
for season in seasons:
entries.extend(self._extract_episodes(season))
return entries
def _extract_episodes(self, season):
entries = []
if not isinstance(season, dict):
return entries
episodes = season.get('episodes')
if not isinstance(episodes, list):
return entries
for episode in episodes:
return []
return self._extract_entries(season.get('episodes'))
def _extract_entries(self, entry_list):
if not isinstance(entry_list, list):
return []
entries = []
for episode in entry_list:
nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str):
continue
@ -465,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{
# new layout
# new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
@ -474,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
},
'playlist_mincount': 60,
}, {
# old layout
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 9,
'playlist_mincount': 10,
}, {
'url': 'http://tv.nrksuper.no/serie/labyrint',
# old layout
'url': 'https://tv.nrksuper.no/serie/labyrint',
'info_dict': {
'id': 'labyrint',
'title': 'Labyrint',
'description': 'md5:58afd450974c89e27d5a19212eee7115',
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
},
'playlist_mincount': 3,
}, {
@ -520,11 +535,11 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str)
entries = []
for season in series['seasons']:
entries.extend(self._extract_episodes(season))
entries.extend(self._extract_seasons(series.get('seasons')))
entries.extend(self._extract_entries(series.get('instalments')))
return self.playlist_result(entries, series_id, title, description)
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
entries = [
self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
@ -536,6 +551,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
'seriestitle', webpage,
'title', default=None) or self._og_search_title(
webpage, fatal=False)
if title:
title = self._search_regex(
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
description = self._html_search_meta(
'series_description', webpage,
@ -596,7 +614,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
'title': 'Rivertonprisen til Karin Fossum',
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
},
'playlist_count': 5,
'playlist_count': 2,
}]
def _extract_title(self, webpage):


Loading…
Cancel
Save