@ -0,0 +1,80 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
int_or_none, | |||
parse_iso8601, | |||
sanitized_Request, | |||
) | |||
class AudiMediaIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)' | |||
_TEST = { | |||
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test', | |||
'md5': '79a8b71c46d49042609795ab59779b66', | |||
'info_dict': { | |||
'id': '1564', | |||
'ext': 'mp4', | |||
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test', | |||
'description': 'md5:60e5d30a78ced725f7b8d34370762941', | |||
'upload_date': '20151124', | |||
'timestamp': 1448354940, | |||
'duration': 74022, | |||
'view_count': int, | |||
} | |||
} | |||
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken) | |||
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2' | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload'); | |||
_, stage_mode, video_id, lang = raw_payload.split('-') | |||
# TODO: handle s and e stage_mode (live streams and ended live streams) | |||
if stage_mode not in ('s', 'e'): | |||
request = sanitized_Request( | |||
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang), | |||
headers={'X-Auth-Token': self._AUTH_TOKEN}) | |||
json_data = self._download_json(request, video_id)['results'] | |||
formats = [] | |||
stream_url_hls = json_data.get('stream_url_hls') | |||
if stream_url_hls: | |||
m3u8_formats = self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) | |||
if m3u8_formats: | |||
formats.extend(m3u8_formats) | |||
stream_url_hds = json_data.get('stream_url_hds') | |||
if stream_url_hds: | |||
f4m_formats = self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False) | |||
if f4m_formats: | |||
formats.extend(f4m_formats) | |||
for video_version in json_data.get('video_versions'): | |||
video_version_url = video_version.get('download_url') or video_version.get('stream_url') | |||
if not video_version_url: | |||
continue | |||
formats.append({ | |||
'url': video_version_url, | |||
'width': int_or_none(video_version.get('width')), | |||
'height': int_or_none(video_version.get('height')), | |||
'abr': int_or_none(video_version.get('audio_bitrate')), | |||
'vbr': int_or_none(video_version.get('video_bitrate')), | |||
}) | |||
self._sort_formats(formats) | |||
return { | |||
'id': video_id, | |||
'title': json_data['title'], | |||
'description': json_data.get('subtitle'), | |||
'thumbnail': json_data.get('thumbnail_image', {}).get('file'), | |||
'timestamp': parse_iso8601(json_data.get('publication_date')), | |||
'duration': int_or_none(json_data.get('duration')), | |||
'view_count': int_or_none(json_data.get('view_count')), | |||
'formats': formats, | |||
} |
@ -0,0 +1,57 @@ | |||
from __future__ import unicode_literals | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
float_or_none, | |||
parse_iso8601, | |||
) | |||
class ClypIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' | |||
_TEST = { | |||
'url': 'https://clyp.it/ojz2wfah', | |||
'md5': '1d4961036c41247ecfdcc439c0cddcbb', | |||
'info_dict': { | |||
'id': 'ojz2wfah', | |||
'ext': 'mp3', | |||
'title': 'Krisson80 - bits wip wip', | |||
'description': '#Krisson80BitsWipWip #chiptune\n#wip', | |||
'duration': 263.21, | |||
'timestamp': 1443515251, | |||
'upload_date': '20150929', | |||
}, | |||
} | |||
def _real_extract(self, url): | |||
audio_id = self._match_id(url) | |||
metadata = self._download_json( | |||
'https://api.clyp.it/%s' % audio_id, audio_id) | |||
formats = [] | |||
for secure in ('', 'Secure'): | |||
for ext in ('Ogg', 'Mp3'): | |||
format_id = '%s%s' % (secure, ext) | |||
format_url = metadata.get('%sUrl' % format_id) | |||
if format_url: | |||
formats.append({ | |||
'url': format_url, | |||
'format_id': format_id, | |||
'vcodec': 'none', | |||
}) | |||
self._sort_formats(formats) | |||
title = metadata['Title'] | |||
description = metadata.get('Description') | |||
duration = float_or_none(metadata.get('Duration')) | |||
timestamp = parse_iso8601(metadata.get('DateCreated')) | |||
return { | |||
'id': audio_id, | |||
'title': title, | |||
'description': description, | |||
'duration': duration, | |||
'timestamp': timestamp, | |||
'formats': formats, | |||
} |
@ -0,0 +1,88 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
import os.path | |||
from .common import InfoExtractor | |||
from ..compat import compat_urlparse | |||
from ..utils import ( | |||
url_basename, | |||
remove_start, | |||
) | |||
class DemocracynowIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)' | |||
IE_NAME = 'democracynow' | |||
_TESTS = [{ | |||
'url': 'http://www.democracynow.org/shows/2015/7/3', | |||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', | |||
'info_dict': { | |||
'id': '2015-0703-001', | |||
'ext': 'mp4', | |||
'title': 'July 03, 2015 - Democracy Now!', | |||
'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', | |||
}, | |||
}, { | |||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', | |||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', | |||
'info_dict': { | |||
'id': '2015-0703-001', | |||
'ext': 'mp4', | |||
'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', | |||
'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', | |||
}, | |||
}] | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
description = self._og_search_description(webpage) | |||
json_data = self._parse_json(self._search_regex( | |||
r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), | |||
display_id) | |||
video_id = None | |||
formats = [] | |||
default_lang = 'en' | |||
subtitles = {} | |||
def add_subtitle_item(lang, info_dict): | |||
if lang not in subtitles: | |||
subtitles[lang] = [] | |||
subtitles[lang].append(info_dict) | |||
# chapter_file are not subtitles | |||
if 'caption_file' in json_data: | |||
add_subtitle_item(default_lang, { | |||
'url': compat_urlparse.urljoin(url, json_data['caption_file']), | |||
}) | |||
for subtitle_item in json_data.get('captions', []): | |||
lang = subtitle_item.get('language', '').lower() or default_lang | |||
add_subtitle_item(lang, { | |||
'url': compat_urlparse.urljoin(url, subtitle_item['url']), | |||
}) | |||
for key in ('file', 'audio', 'video'): | |||
media_url = json_data.get(key, '') | |||
if not media_url: | |||
continue | |||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) | |||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') | |||
formats.append({ | |||
'url': media_url, | |||
}) | |||
self._sort_formats(formats) | |||
return { | |||
'id': video_id or display_id, | |||
'title': json_data['title'], | |||
'description': description, | |||
'subtitles': subtitles, | |||
'formats': formats, | |||
} |
@ -0,0 +1,51 @@ | |||
# encoding: utf-8 | |||
from __future__ import unicode_literals | |||
import time | |||
from .common import InfoExtractor | |||
from ..utils import int_or_none | |||
class DPlayIE(InfoExtractor): | |||
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)' | |||
_TEST = { | |||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', | |||
'info_dict': { | |||
'id': '3172', | |||
'ext': 'mp4', | |||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', | |||
'title': 'Svensken lär sig njuta av livet', | |||
'duration': 2650, | |||
}, | |||
} | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
video_id = self._search_regex( | |||
r'data-video-id="(\d+)"', webpage, 'video id') | |||
info = self._download_json( | |||
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id, | |||
video_id)['data'][0] | |||
self._set_cookie( | |||
'secure.dplay.se', 'dsc-geo', | |||
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000)) | |||
# TODO: consider adding support for 'stream_type=hds', it seems to | |||
# require setting some cookies | |||
manifest_url = self._download_json( | |||
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id, | |||
video_id, 'Getting manifest url for hls stream')['hls'] | |||
formats = self._extract_m3u8_formats( | |||
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native') | |||
return { | |||
'id': video_id, | |||
'display_id': display_id, | |||
'title': info['title'], | |||
'formats': formats, | |||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000), | |||
} |
@ -1,39 +1,92 @@ | |||
# encoding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from .brightcove import BrightcoveIE | |||
from ..utils import ExtractorError | |||
from ..utils import ( | |||
float_or_none, | |||
int_or_none, | |||
parse_iso8601, | |||
sanitized_Request, | |||
) | |||
class EitbIE(InfoExtractor): | |||
IE_NAME = 'eitb.tv' | |||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' | |||
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)' | |||
_TEST = { | |||
'add_ie': ['Brightcove'], | |||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', | |||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/', | |||
'md5': 'edf4436247185adee3ea18ce64c47998', | |||
'info_dict': { | |||
'id': '2743577154001', | |||
'id': '4090227752001', | |||
'ext': 'mp4', | |||
'title': '60 minutos (Lasa y Zabala, 30 años)', | |||
# All videos from eitb has this description in the brightcove info | |||
'description': '.', | |||
'uploader': 'Euskal Telebista', | |||
'description': 'Programa de reportajes de actualidad.', | |||
'duration': 3996.76, | |||
'timestamp': 1381789200, | |||
'upload_date': '20131014', | |||
'tags': list, | |||
}, | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
chapter_id = mobj.group('chapter_id') | |||
webpage = self._download_webpage(url, chapter_id) | |||
bc_url = BrightcoveIE._extract_brightcove_url(webpage) | |||
if bc_url is None: | |||
raise ExtractorError('Could not extract the Brightcove url') | |||
# The BrightcoveExperience object doesn't contain the video id, we set | |||
# it manually | |||
bc_url += '&%40videoPlayer={0}'.format(chapter_id) | |||
return self.url_result(bc_url, BrightcoveIE.ie_key()) | |||
video_id = self._match_id(url) | |||
video = self._download_json( | |||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id, | |||
video_id, 'Downloading video JSON') | |||
media = video['web_media'][0] | |||
formats = [] | |||
for rendition in media['RENDITIONS']: | |||
video_url = rendition.get('PMD_URL') | |||
if not video_url: | |||
continue | |||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) | |||
format_id = 'http' | |||
if tbr: | |||
format_id += '-%d' % int(tbr) | |||
formats.append({ | |||
'url': rendition['PMD_URL'], | |||
'format_id': format_id, | |||
'width': int_or_none(rendition.get('FRAME_WIDTH')), | |||
'height': int_or_none(rendition.get('FRAME_HEIGHT')), | |||
'tbr': tbr, | |||
}) | |||
hls_url = media.get('HLS_SURL') | |||
if hls_url: | |||
request = sanitized_Request( | |||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', | |||
headers={'Referer': url}) | |||
token_data = self._download_json( | |||
request, video_id, 'Downloading auth token', fatal=False) | |||
if token_data: | |||
token = token_data.get('token') | |||
if token: | |||
m3u8_formats = self._extract_m3u8_formats( | |||
'%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False) | |||
if m3u8_formats: | |||
formats.extend(m3u8_formats) | |||
hds_url = media.get('HDS_SURL') | |||
if hds_url: | |||
f4m_formats = self._extract_f4m_formats( | |||
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), | |||
video_id, f4m_id='hds', fatal=False) | |||
if f4m_formats: | |||
formats.extend(f4m_formats) | |||
self._sort_formats(formats) | |||
return { | |||
'id': video_id, | |||
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'], | |||
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'), | |||
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'), | |||
'duration': float_or_none(media.get('LENGTH'), 1000), | |||
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '), | |||
'tags': media.get('TAGS'), | |||
'formats': formats, | |||
} |
@ -1,19 +1,62 @@ | |||
from __future__ import unicode_literals | |||
from .mtv import MTVServicesInfoExtractor | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
int_or_none, | |||
parse_age_limit, | |||
url_basename, | |||
) | |||
class GametrailersIE(MTVServicesInfoExtractor): | |||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' | |||
class GametrailersIE(InfoExtractor): | |||
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' | |||
_TEST = { | |||
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', | |||
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7', | |||
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', | |||
'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', | |||
'info_dict': { | |||
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d', | |||
'id': '2983958', | |||
'ext': 'mp4', | |||
'title': 'E3 2013: Debut Trailer', | |||
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', | |||
'display_id': '116437-Just-Cause-3-Review', | |||
'title': 'Just Cause 3 - Review', | |||
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', | |||
}, | |||
} | |||
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss' | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
title = self._html_search_regex( | |||
r'<title>(.+?)\|', webpage, 'title').strip() | |||
embed_url = self._proto_relative_url( | |||
self._search_regex( | |||
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, | |||
'embed url'), | |||
scheme='http:') | |||
video_id = url_basename(embed_url) | |||
embed_page = self._download_webpage(embed_url, video_id) | |||
embed_vars_json = self._search_regex( | |||
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, | |||
'embed vars') | |||
info = self._parse_json(embed_vars_json, video_id) | |||
formats = [] | |||
for media in info['media']: | |||
if media['mediaPurpose'] == 'play': | |||
formats.append({ | |||
'url': media['uri'], | |||
'height': media['height'], | |||
'width:': media['width'], | |||
}) | |||
self._sort_formats(formats) | |||
return { | |||
'id': video_id, | |||
'display_id': display_id, | |||
'title': title, | |||
'formats': formats, | |||
'thumbnail': info.get('thumbUri'), | |||
'description': self._og_search_description(webpage), | |||
'duration': int_or_none(info.get('videoLengthInSeconds')), | |||
'age_limit': parse_age_limit(info.get('audienceRating')), | |||
} |
@ -1,64 +1,169 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import compat_urlparse | |||
from ..utils import ( | |||
determine_ext, | |||
int_or_none, | |||
parse_duration, | |||
parse_iso8601, | |||
xpath_text, | |||
) | |||
class MDRIE(InfoExtractor): | |||
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' | |||
IE_DESC = 'MDR.DE and KiKA' | |||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html' | |||
# No tests, MDR regularily deletes its videos | |||
_TEST = { | |||
_TESTS = [{ | |||
# MDR regularily deletes its videos | |||
'url': 'http://www.mdr.de/fakt/video189002.html', | |||
'only_matching': True, | |||
} | |||
}, { | |||
# audio | |||
'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html', | |||
'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa', | |||
'info_dict': { | |||
'id': '1312272', | |||
'ext': 'mp3', | |||
'title': 'Feuilleton vom 30. Oktober 2015', | |||
'duration': 250, | |||
'uploader': 'MITTELDEUTSCHER RUNDFUNK', | |||
}, | |||
}, { | |||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html', | |||
'md5': '4930515e36b06c111213e80d1e4aad0e', | |||
'info_dict': { | |||
'id': '19636', | |||
'ext': 'mp4', | |||
'title': 'Baumhaus vom 30. Oktober 2015', | |||
'duration': 134, | |||
'uploader': 'KIKA', | |||
}, | |||
}, { | |||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', | |||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', | |||
'info_dict': { | |||
'id': '8182', | |||
'ext': 'mp4', | |||
'title': 'Beutolomäus und der geheime Weihnachtswunsch', | |||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', | |||
'timestamp': 1419047100, | |||
'upload_date': '20141220', | |||
'duration': 4628, | |||
'uploader': 'KIKA', | |||
}, | |||
}, { | |||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', | |||
'only_matching': True, | |||
}, { | |||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
m = re.match(self._VALID_URL, url) | |||
video_id = m.group('video_id') | |||
domain = m.group('domain') | |||
video_id = self._match_id(url) | |||
webpage = self._download_webpage(url, video_id) | |||
data_url = self._search_regex( | |||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1', | |||
webpage, 'data url', group='url') | |||
# determine title and media streams from webpage | |||
html = self._download_webpage(url, video_id) | |||
doc = self._download_xml( | |||
compat_urlparse.urljoin(url, data_url), video_id) | |||
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title') | |||
xmlurl = self._search_regex( | |||
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL') | |||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) | |||
doc = self._download_xml(domain + xmlurl, video_id) | |||
formats = [] | |||
for a in doc.findall('./assets/asset'): | |||
url_el = a.find('./progressiveDownloadUrl') | |||
if url_el is None: | |||
continue | |||
abr = int(a.find('bitrateAudio').text) // 1000 | |||
media_type = a.find('mediaType').text | |||
format = { | |||
'abr': abr, | |||
'filesize': int(a.find('fileSize').text), | |||
'url': url_el.text, | |||
} | |||
vbr_el = a.find('bitrateVideo') | |||
if vbr_el is None: | |||
format.update({ | |||
'vcodec': 'none', | |||
'format_id': '%s-%d' % (media_type, abr), | |||
}) | |||
else: | |||
vbr = int(vbr_el.text) // 1000 | |||
format.update({ | |||
'vbr': vbr, | |||
'width': int(a.find('frameWidth').text), | |||
'height': int(a.find('frameHeight').text), | |||
'format_id': '%s-%d' % (media_type, vbr), | |||
}) | |||
formats.append(format) | |||
processed_urls = [] | |||
for asset in doc.findall('./assets/asset'): | |||
for source in ( | |||
'progressiveDownload', | |||
'dynamicHttpStreamingRedirector', | |||
'adaptiveHttpStreamingRedirector'): | |||
url_el = asset.find('./%sUrl' % source) | |||
if url_el is None: | |||
continue | |||
video_url = url_el.text | |||
if video_url in processed_urls: | |||
continue | |||
processed_urls.append(video_url) | |||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | |||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | |||
ext = determine_ext(url_el.text) | |||
if ext == 'm3u8': | |||
url_formats = self._extract_m3u8_formats( | |||
video_url, video_id, 'mp4', entry_protocol='m3u8_native', | |||
preference=0, m3u8_id='HLS', fatal=False) | |||
elif ext == 'f4m': | |||
url_formats = self._extract_f4m_formats( | |||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, | |||
preference=0, f4m_id='HDS', fatal=False) | |||
else: | |||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') | |||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | |||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | |||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) | |||
f = { | |||
'url': video_url, | |||
'format_id': '%s-%d' % (media_type, vbr or abr), | |||
'filesize': filesize, | |||
'abr': abr, | |||
'preference': 1, | |||
} | |||
if vbr: | |||
width = int_or_none(xpath_text(asset, './frameWidth', 'width')) | |||
height = int_or_none(xpath_text(asset, './frameHeight', 'height')) | |||
f.update({ | |||
'vbr': vbr, | |||
'width': width, | |||
'height': height, | |||
}) | |||
url_formats = [f] | |||
if not url_formats: | |||
continue | |||
if not vbr: | |||
for f in url_formats: | |||
abr = f.get('tbr') or abr | |||
if 'tbr' in f: | |||
del f['tbr'] | |||
f.update({ | |||
'abr': abr, | |||
'vcodec': 'none', | |||
}) | |||
formats.extend(url_formats) | |||
self._sort_formats(formats) | |||
description = xpath_text(doc, './broadcast/broadcastDescription', 'description') | |||
timestamp = parse_iso8601( | |||
xpath_text( | |||
doc, [ | |||
'./broadcast/broadcastDate', | |||
'./broadcast/broadcastStartDate', | |||
'./broadcast/broadcastEndDate'], | |||
'timestamp', default=None)) | |||
duration = parse_duration(xpath_text(doc, './duration', 'duration')) | |||
uploader = xpath_text(doc, './rights', 'uploader') | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'description': description, | |||
'timestamp': timestamp, | |||
'duration': duration, | |||
'uploader': uploader, | |||
'formats': formats, | |||
} |
@ -1,80 +1,40 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import ( | |||
compat_str, | |||
) | |||
from ..utils import ( | |||
ExtractorError, | |||
clean_html, | |||
) | |||
from ..utils import sanitized_Request | |||
class MovieClipsIE(InfoExtractor): | |||
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?' | |||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)' | |||
_TEST = { | |||
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/', | |||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5', | |||
'info_dict': { | |||
'id': 'Wy7ZU', | |||
'display_id': 'my-week-with-marilyn-movie-do-you-love-me', | |||
'id': 'pKIGmG83AqD9', | |||
'display_id': 'warcraft-trailer-1-561180739597', | |||
'ext': 'mp4', | |||
'title': 'My Week with Marilyn - Do You Love Me?', | |||
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb', | |||
'title': 'Warcraft Trailer 1', | |||
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', | |||
'thumbnail': 're:^https?://.*\.jpg$', | |||
}, | |||
'params': { | |||
# rtmp download | |||
'skip_download': True, | |||
} | |||
'add_ie': ['ThePlatform'], | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
display_id = mobj.group('display_id') | |||
show_id = display_id or video_id | |||
config = self._download_xml( | |||
'http://config.movieclips.com/player/config/%s' % video_id, | |||
show_id, 'Downloading player config') | |||
if config.find('./country-region').text == 'false': | |||
raise ExtractorError( | |||
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True) | |||
properties = config.find('./video/properties') | |||
smil_file = properties.attrib['smil_file'] | |||
display_id = self._match_id(url) | |||
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL') | |||
base_url = smil.find('./head/meta').attrib['base'] | |||
formats = [] | |||
for video in smil.findall('./body/switch/video'): | |||
vbr = int(video.attrib['system-bitrate']) / 1000 | |||
src = video.attrib['src'] | |||
formats.append({ | |||
'url': base_url, | |||
'play_path': src, | |||
'ext': src.split(':')[0], | |||
'vbr': vbr, | |||
'format_id': '%dk' % vbr, | |||
}) | |||
self._sort_formats(formats) | |||
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title']) | |||
description = clean_html(compat_str(properties.attrib['clip_description'])) | |||
thumbnail = properties.attrib['image'] | |||
categories = properties.attrib['clip_categories'].split(',') | |||
req = sanitized_Request(url) | |||
# it doesn't work if it thinks the browser it's too old | |||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') | |||
webpage = self._download_webpage(req, display_id) | |||
theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link') | |||
title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title') | |||
description = self._html_search_meta('description', webpage) | |||
return { | |||
'id': video_id, | |||
'display_id': display_id, | |||
'_type': 'url_transparent', | |||
'url': theplatform_link, | |||
'title': title, | |||
'display_id': display_id, | |||
'description': description, | |||
'thumbnail': thumbnail, | |||
'categories': categories, | |||
'formats': formats, | |||
} |