|
|
@ -1,6 +1,7 @@ |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
from .subtitles import SubtitlesInfoExtractor |
|
|
|
from .common import InfoExtractor |
|
|
|
from ..utils import ( |
|
|
|
fix_xml_ampersands, |
|
|
|
parse_duration, |
|
|
@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor): |
|
|
|
|
|
|
|
class NPOIE(NPOBaseIE): |
|
|
|
IE_NAME = 'npo.nl' |
|
|
|
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)' |
|
|
|
|
|
|
|
_TESTS = [ |
|
|
|
{ |
|
|
@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE): |
|
|
|
|
|
|
|
class NPOLiveIE(NPOBaseIE): |
|
|
|
IE_NAME = 'npo.nl:live' |
|
|
|
_VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)' |
|
|
|
|
|
|
|
_TEST = { |
|
|
|
'url': 'http://www.npo.nl/live/npo-1', |
|
|
@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE): |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class NPORadioIE(InfoExtractor): |
|
|
|
IE_NAME = 'npo.nl:radio' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$' |
|
|
|
|
|
|
|
_TEST = { |
|
|
|
'url': 'http://www.npo.nl/radio/radio-1', |
|
|
|
'info_dict': { |
|
|
|
'id': 'radio-1', |
|
|
|
'ext': 'mp3', |
|
|
|
'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
|
|
|
'is_live': True, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def _html_get_attribute_regex(attribute): |
|
|
|
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
video_id = self._match_id(url) |
|
|
|
|
|
|
|
webpage = self._download_webpage(url, video_id) |
|
|
|
|
|
|
|
title = self._html_search_regex( |
|
|
|
self._html_get_attribute_regex('data-channel'), webpage, 'title') |
|
|
|
|
|
|
|
stream = self._parse_json( |
|
|
|
self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'), |
|
|
|
video_id) |
|
|
|
|
|
|
|
codec = stream.get('codec') |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'url': stream['url'], |
|
|
|
'title': self._live_title(title), |
|
|
|
'acodec': codec, |
|
|
|
'ext': codec, |
|
|
|
'is_live': True, |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class NPORadioFragmentIE(InfoExtractor): |
|
|
|
IE_NAME = 'npo.nl:radio:fragment' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)' |
|
|
|
|
|
|
|
_TEST = { |
|
|
|
'url': 'http://www.npo.nl/radio/radio-5/fragment/174356', |
|
|
|
'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2', |
|
|
|
'info_dict': { |
|
|
|
'id': '174356', |
|
|
|
'ext': 'mp3', |
|
|
|
'title': 'Jubileumconcert Willeke Alberti', |
|
|
|
}, |
|
|
|
} |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
audio_id = self._match_id(url) |
|
|
|
|
|
|
|
webpage = self._download_webpage(url, audio_id) |
|
|
|
|
|
|
|
title = self._html_search_regex( |
|
|
|
r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id, |
|
|
|
webpage, 'title') |
|
|
|
|
|
|
|
audio_url = self._search_regex( |
|
|
|
r"data-streams='([^']+)'", webpage, 'audio url') |
|
|
|
|
|
|
|
return { |
|
|
|
'id': audio_id, |
|
|
|
'url': audio_url, |
|
|
|
'title': title, |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class TegenlichtVproIE(NPOIE): |
|
|
|
IE_NAME = 'tegenlicht.vpro.nl' |
|
|
|
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' |
|
|
|