|
|
@ -4,102 +4,83 @@ from __future__ import unicode_literals |
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from ..compat import compat_urllib_parse_urlparse |
|
|
|
from ..utils import ( |
|
|
|
ExtractorError, |
|
|
|
qualities, |
|
|
|
unified_strdate, |
|
|
|
clean_html, |
|
|
|
) |
|
|
|
from ..utils import int_or_none |
|
|
|
|
|
|
|
|
|
|
|
class UltimediaIE(InfoExtractor): |
|
|
|
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/deliver/(?P<type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+)' |
|
|
|
_TESTS = [{ |
|
|
|
# news |
|
|
|
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', |
|
|
|
'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01601930/zone/1/src/s8uk0r/autoplay/yes/ad/no/width/714/height/435', |
|
|
|
'md5': '276a0e49de58c7e85d32b057837952a2', |
|
|
|
'info_dict': { |
|
|
|
'id': 's8uk0r', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', |
|
|
|
'description': 'md5:3e5c8fd65791487333dda5db8aed32af', |
|
|
|
'thumbnail': 're:^https?://.*\.jpg', |
|
|
|
'duration': 74, |
|
|
|
'upload_date': '20150317', |
|
|
|
'timestamp': 1426604939, |
|
|
|
'uploader_id': '3fszv', |
|
|
|
}, |
|
|
|
}, { |
|
|
|
# music |
|
|
|
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', |
|
|
|
'url': 'https://www.ultimedia.com/deliver/musique/iframe/mdtk/01601930/zone/1/article/xvpfp8/autoplay/yes/ad/no/width/714/height/435', |
|
|
|
'md5': '2ea3513813cf230605c7e2ffe7eca61c', |
|
|
|
'info_dict': { |
|
|
|
'id': 'xvpfp8', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': "Two - C'est la vie (Clip)", |
|
|
|
'description': 'Two', |
|
|
|
'title': 'Two - C\'est La Vie (clip)', |
|
|
|
'thumbnail': 're:^https?://.*\.jpg', |
|
|
|
'duration': 233, |
|
|
|
'upload_date': '20150224', |
|
|
|
'timestamp': 1424760500, |
|
|
|
'uploader_id': '3rfzk', |
|
|
|
}, |
|
|
|
}] |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
video_id = self._match_id(url) |
|
|
|
webpage = self._download_webpage(url, video_id) |
|
|
|
@staticmethod |
|
|
|
def _extract_url(webpage): |
|
|
|
mobj = re.search( |
|
|
|
r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)', |
|
|
|
webpage) |
|
|
|
if mobj: |
|
|
|
return mobj.group('url') |
|
|
|
|
|
|
|
deliver_url = self._proto_relative_url(self._search_regex( |
|
|
|
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"', |
|
|
|
webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':') |
|
|
|
def _real_extract(self, url): |
|
|
|
video_type, video_id = re.match(self._VALID_URL, url).groups() |
|
|
|
|
|
|
|
deliver_page = self._download_webpage( |
|
|
|
deliver_url, video_id, 'Downloading iframe page') |
|
|
|
deliver_info = self._download_json( |
|
|
|
'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type), |
|
|
|
video_id) |
|
|
|
|
|
|
|
if '>This video is currently not available' in deliver_page: |
|
|
|
raise ExtractorError( |
|
|
|
'Video %s is currently not available' % video_id, expected=True) |
|
|
|
yt_id = deliver_info.get('yt_id') |
|
|
|
if yt_id: |
|
|
|
return self.url_result(yt_id, 'Youtube') |
|
|
|
|
|
|
|
player = self._parse_json( |
|
|
|
self._search_regex( |
|
|
|
r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", |
|
|
|
deliver_page, 'player'), |
|
|
|
video_id) |
|
|
|
jwconf = deliver_info['jwconf'] |
|
|
|
|
|
|
|
quality = qualities(['flash', 'html5']) |
|
|
|
formats = [] |
|
|
|
for mode in player['modes']: |
|
|
|
video_url = mode.get('config', {}).get('file') |
|
|
|
if not video_url: |
|
|
|
continue |
|
|
|
if re.match(r'https?://www\.youtube\.com/.+?', video_url): |
|
|
|
return self.url_result(video_url, 'Youtube') |
|
|
|
for source in jwconf['playlist'][0]['sources']: |
|
|
|
formats.append({ |
|
|
|
'url': video_url, |
|
|
|
'format_id': mode.get('type'), |
|
|
|
'quality': quality(mode.get('type')), |
|
|
|
'url': source['file'], |
|
|
|
'format_id': source.get('label'), |
|
|
|
}) |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
thumbnail = player.get('image') |
|
|
|
|
|
|
|
title = clean_html(( |
|
|
|
self._html_search_regex( |
|
|
|
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>', |
|
|
|
webpage, 'title', default=None) or |
|
|
|
self._search_regex( |
|
|
|
r"var\s+nameVideo\s*=\s*'([^']+)'", |
|
|
|
deliver_page, 'title'))) |
|
|
|
|
|
|
|
description = clean_html(self._html_search_regex( |
|
|
|
r'(?s)<span>Description</span>(.+?)</p>', webpage, |
|
|
|
'description', fatal=False)) |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
upload_date = unified_strdate(self._search_regex( |
|
|
|
r'Ajouté le\s*<span>([^<]+)', webpage, |
|
|
|
'upload date', fatal=False)) |
|
|
|
title = deliver_info['title'] |
|
|
|
thumbnail = jwconf.get('image') |
|
|
|
duration = int_or_none(deliver_info.get('duration')) |
|
|
|
timestamp = int_or_none(deliver_info.get('release_time')) |
|
|
|
uploader_id = deliver_info.get('owner_id') |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'upload_date': upload_date, |
|
|
|
'duration': duration, |
|
|
|
'timestamp': timestamp, |
|
|
|
'uploader_id': uploader_id, |
|
|
|
'formats': formats, |
|
|
|
} |