Browse Source

[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.
rtmp_test
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
648d25d43d
2 changed files with 48 additions and 18 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +44
    -17
      youtube_dl/extractor/francetv.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -29,7 +29,10 @@ from .escapist import EscapistIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .francetv import PluzzIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
)
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE


+ 44
- 17
youtube_dl/extractor/francetv.py View File

@ -8,7 +8,29 @@ from ..utils import (
) )
class PluzzIE(InfoExtractor):
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
video_url = video_url.replace('/z/', '/i/')
thumbnail_path = info.find('image').text
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
'description': info.find('synopsis').text,
}
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr' IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
@ -29,22 +51,27 @@ class PluzzIE(InfoExtractor):
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
video_id = self._search_regex( video_id = self._search_regex(
r'data-diffusion="(\d+)"', webpage, 'ID') r'data-diffusion="(\d+)"', webpage, 'ID')
return self._extract_video(video_id)
xml_desc = self._download_webpage(
'http://www.pluzz.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, title, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
video_url = video_url.replace('/z/', '/i/')
thumbnail_path = info.find('image').text
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin(url, thumbnail_path),
'description': info.find('synopsis').text,
}
_TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
u'file': u'84981923.mp4',
u'info_dict': {
u'title': u'Soir 3',
},
u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)

Loading…
Cancel
Save