Browse Source

[raiplay:playlist] Fix issues and improve (closes #14563)

master-ytdl-org
Sergey M․ 7 years ago
parent
commit
1115271ac6
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 37 additions and 27 deletions
  1. +1
    -1
      youtube_dl/extractor/extractors.py
  2. +36
    -26
      youtube_dl/extractor/rai.py

+ 1
- 1
youtube_dl/extractor/extractors.py View File

@ -856,8 +856,8 @@ from .radiofrance import RadioFranceIE
from .rai import (
RaiPlayIE,
RaiPlayLiveIE,
RaiPlayPlaylistIE,
RaiIE,
RaiPlaylistIE,
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE


+ 36
- 26
youtube_dl/extractor/rai.py View File

@ -17,6 +17,7 @@ from ..utils import (
parse_duration,
strip_or_none,
try_get,
unescapeHTML,
unified_strdate,
unified_timestamp,
update_url_query,
@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE):
}
class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': {
'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
print(description)
entries = []
for mobj in re.finditer(
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
webpage):
video_url = urljoin(url, mobj.group('path'))
entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description)
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
@ -455,29 +491,3 @@ class RaiIE(RaiBaseIE):
info.update(relinker_info)
return info
class RaiPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': {
'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_meta('programma', webpage, default=None)
video_urls = re.findall(' href="(/raiplay/video.+)"', webpage)
video_urls = [urljoin(url, video_url) for video_url in video_urls]
entries = [
self.url_result(
video_url,
RaiPlayIE.ie_key())
for video_url in video_urls if RaiPlayIE.suitable(video_url)
]
return self.playlist_result(entries, playlist_id, title)

Loading…
Cancel
Save