|
|
@ -0,0 +1,221 @@ |
|
|
|
# coding: utf-8 |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from ..compat import ( |
|
|
|
compat_str, |
|
|
|
compat_urllib_parse_unquote, |
|
|
|
) |
|
|
|
from ..utils import ( |
|
|
|
int_or_none, |
|
|
|
parse_iso8601, |
|
|
|
strip_or_none, |
|
|
|
try_get, |
|
|
|
unescapeHTML, |
|
|
|
urljoin, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class KinjaEmbedIE(InfoExtractor): |
|
|
|
IENAME = 'kinja:embed' |
|
|
|
_DOMAIN_REGEX = r'''(?:[^.]+\.)? |
|
|
|
(?: |
|
|
|
avclub| |
|
|
|
clickhole| |
|
|
|
deadspin| |
|
|
|
gizmodo| |
|
|
|
jalopnik| |
|
|
|
jezebel| |
|
|
|
kinja| |
|
|
|
kotaku| |
|
|
|
lifehacker| |
|
|
|
splinternews| |
|
|
|
the(?:inventory|onion|root|takeout) |
|
|
|
)\.com''' |
|
|
|
_COMMON_REGEX = r'''/ |
|
|
|
(?: |
|
|
|
ajax/inset| |
|
|
|
embed/video |
|
|
|
)/iframe\?.*?\bid=''' |
|
|
|
_VALID_URL = r'''(?x)https?://%s%s |
|
|
|
(?P<type> |
|
|
|
fb| |
|
|
|
imgur| |
|
|
|
instagram| |
|
|
|
jwp(?:layer)?-video| |
|
|
|
kinjavideo| |
|
|
|
mcp| |
|
|
|
megaphone| |
|
|
|
ooyala| |
|
|
|
soundcloud(?:-playlist)?| |
|
|
|
tumblr-post| |
|
|
|
twitch-stream| |
|
|
|
twitter| |
|
|
|
ustream-channel| |
|
|
|
vimeo| |
|
|
|
vine| |
|
|
|
youtube-(?:list|video) |
|
|
|
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', |
|
|
|
'only_matching': True, |
|
|
|
}, { |
|
|
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', |
|
|
|
'only_matching': True, |
|
|
|
}] |
|
|
|
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') |
|
|
|
_PROVIDER_MAP = { |
|
|
|
'fb': ('facebook.com/video.php?v=', 'Facebook'), |
|
|
|
'imgur': ('imgur.com/', 'Imgur'), |
|
|
|
'instagram': ('instagram.com/p/', 'Instagram'), |
|
|
|
'jwplayer-video': _JWPLATFORM_PROVIDER, |
|
|
|
'jwp-video': _JWPLATFORM_PROVIDER, |
|
|
|
'megaphone': ('player.megaphone.fm/', 'Generic'), |
|
|
|
'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), |
|
|
|
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), |
|
|
|
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), |
|
|
|
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), |
|
|
|
'twitch-stream': ('twitch.tv/', 'TwitchStream'), |
|
|
|
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), |
|
|
|
'ustream-channel': ('ustream.tv/embed/', 'Ustream'), |
|
|
|
'vimeo': ('vimeo.com/', 'Vimeo'), |
|
|
|
'vine': ('vine.co/v/', 'Vine'), |
|
|
|
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), |
|
|
|
'youtube-video': ('youtube.com/embed/', 'Youtube'), |
|
|
|
} |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def _extract_urls(webpage, url): |
|
|
|
return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( |
|
|
|
r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), |
|
|
|
webpage)] |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
video_type, video_id = re.match(self._VALID_URL, url).groups() |
|
|
|
|
|
|
|
provider = self._PROVIDER_MAP.get(video_type) |
|
|
|
if provider: |
|
|
|
video_id = compat_urllib_parse_unquote(video_id) |
|
|
|
if video_type == 'tumblr-post': |
|
|
|
video_id, blog = video_id.split('-', 1) |
|
|
|
result_url = provider[0] % (blog, video_id) |
|
|
|
elif video_type == 'youtube-list': |
|
|
|
video_id, playlist_id = video_id.split('/') |
|
|
|
result_url = provider[0] % (video_id, playlist_id) |
|
|
|
else: |
|
|
|
if video_type == 'ooyala': |
|
|
|
video_id = video_id.split('/')[0] |
|
|
|
result_url = provider[0] + video_id |
|
|
|
return self.url_result('http://' + result_url, provider[1]) |
|
|
|
|
|
|
|
if video_type == 'kinjavideo': |
|
|
|
data = self._download_json( |
|
|
|
'https://kinja.com/api/core/video/views/videoById', |
|
|
|
video_id, query={'videoId': video_id})['data'] |
|
|
|
title = data['title'] |
|
|
|
|
|
|
|
formats = [] |
|
|
|
for k in ('signedPlaylist', 'streaming'): |
|
|
|
m3u8_url = data.get(k + 'Url') |
|
|
|
if m3u8_url: |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
m3u8_url, video_id, 'mp4', 'm3u8_native', |
|
|
|
m3u8_id='hls', fatal=False)) |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
thumbnail = None |
|
|
|
poster = data.get('poster') or {} |
|
|
|
poster_id = poster.get('id') |
|
|
|
if poster_id: |
|
|
|
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'description': strip_or_none(data.get('description')), |
|
|
|
'formats': formats, |
|
|
|
'tags': data.get('tags'), |
|
|
|
'timestamp': int_or_none(try_get( |
|
|
|
data, lambda x: x['postInfo']['publishTimeMillis']), 1000), |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'uploader': data.get('network'), |
|
|
|
} |
|
|
|
else: |
|
|
|
video_data = self._download_json( |
|
|
|
'https://api.vmh.univision.com/metadata/v1/content/' + video_id, |
|
|
|
video_id)['videoMetadata'] |
|
|
|
iptc = video_data['photoVideoMetadataIPTC'] |
|
|
|
title = iptc['title']['en'] |
|
|
|
fmg = video_data.get('photoVideoMetadata_fmg') or {} |
|
|
|
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' |
|
|
|
data = self._download_json( |
|
|
|
tvss_domain + '/api/v3/video-auth/url-signature-tokens', |
|
|
|
video_id, query={'mcpids': video_id})['data'][0] |
|
|
|
formats = [] |
|
|
|
|
|
|
|
rendition_url = data.get('renditionUrl') |
|
|
|
if rendition_url: |
|
|
|
formats = self._extract_m3u8_formats( |
|
|
|
rendition_url, video_id, 'mp4', |
|
|
|
'm3u8_native', m3u8_id='hls', fatal=False) |
|
|
|
|
|
|
|
fallback_rendition_url = data.get('fallbackRenditionUrl') |
|
|
|
if fallback_rendition_url: |
|
|
|
formats.append({ |
|
|
|
'format_id': 'fallback', |
|
|
|
'tbr': int_or_none(self._search_regex( |
|
|
|
r'_(\d+)\.mp4', fallback_rendition_url, |
|
|
|
'bitrate', default=None)), |
|
|
|
'url': fallback_rendition_url, |
|
|
|
}) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), |
|
|
|
'uploader': fmg.get('network'), |
|
|
|
'duration': int_or_none(iptc.get('fileDuration')), |
|
|
|
'formats': formats, |
|
|
|
'description': try_get(iptc, lambda x: x['description']['en'], compat_str), |
|
|
|
'timestamp': parse_iso8601(iptc.get('dateReleased')), |
|
|
|
} |