Browse Source

[franceculture] Add extractor for '/emission-*' urls (closes #3777, closes #8022)

totalwebcasting
flatgreen 9 years ago
committed by Jaime Marquínez Ferrándiz
parent
commit
ecf17d1653
2 changed files with 42 additions and 3 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +38
    -2
      youtube_dl/extractor/franceculture.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -203,7 +203,10 @@ from .fourtube import FourTubeIE
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .foxsports import FoxSportsIE from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE
from .franceculture import (
FranceCultureIE,
FranceCultureEmissionIE,
)
from .franceinter import FranceInterIE from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
PluzzIE, PluzzIE,


+ 38
- 2
youtube_dl/extractor/franceculture.py View File

@ -8,6 +8,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
ExtractorError,
) )
@ -28,8 +29,7 @@ class FranceCultureIE(InfoExtractor):
} }
} }
def _real_extract(self, url):
video_id = self._match_id(url)
def _extract_from_player(self, url, video_id):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_path = self._search_regex( video_path = self._search_regex(
@ -42,6 +42,9 @@ class FranceCultureIE(InfoExtractor):
r'<a id="player".*?>\s+<img src="([^"]+)"', r'<a id="player".*?>\s+<img src="([^"]+)"',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
display_id = self._search_regex(
r'<span class="path-diffusion">emission-(.*?)</span>', webpage, 'display_id')
title = self._html_search_regex( title = self._html_search_regex(
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
alt_title = self._html_search_regex( alt_title = self._html_search_regex(
@ -66,4 +69,37 @@ class FranceCultureIE(InfoExtractor):
'alt_title': alt_title, 'alt_title': alt_title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': description, 'description': description,
'display_id': display_id,
} }
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_player(url, video_id)
class FranceCultureEmissionIE(FranceCultureIE):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)'
_TEST = {
'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
'info_dict': {
'title': 'Jean-Gabriel Périot, cinéaste',
'alt_title': 'Les Carnets de la création',
'id': '5093239',
'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
'ext': 'mp3',
'timestamp': 1444762500,
'upload_date': '20151013',
'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_path = self._html_search_regex(
r'<a class="rf-player-open".*?href="([^"]+)"', webpage, 'video path', 'no_path_player')
if video_path == 'no_path_player':
raise ExtractorError('no player : no sound in this page.', expected=True)
new_id = self._search_regex('play=(?P<id>[0-9]+)', video_path, 'new_id', group='id')
video_url = compat_urlparse.urljoin(url, video_path)
return self._extract_from_player(video_url, new_id)

Loading…
Cancel
Save