|
|
@ -7,8 +7,10 @@ from .common import InfoExtractor |
|
|
|
|
|
|
|
from ..compat import compat_str |
|
|
|
from ..utils import ( |
|
|
|
float_or_none, |
|
|
|
int_or_none, |
|
|
|
try_get, |
|
|
|
url_or_none, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor): |
|
|
|
''' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', |
|
|
|
'md5': '0de43ac406aa3e4ea74b66c9c7789b13', |
|
|
|
'md5': 'b0ce2b05ca215042124fbc9e3886493a', |
|
|
|
'info_dict': { |
|
|
|
'id': '102', |
|
|
|
'ext': 'mp4', |
|
|
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor): |
|
|
|
'uploader': 'Dan Dennett', |
|
|
|
'width': 853, |
|
|
|
'duration': 1308, |
|
|
|
} |
|
|
|
'view_count': int, |
|
|
|
'comment_count': int, |
|
|
|
'tags': list, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
}, { |
|
|
|
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', |
|
|
|
'md5': 'b899ac15e345fb39534d913f7606082b', |
|
|
|
# missing HTTP bitrates |
|
|
|
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms', |
|
|
|
'info_dict': { |
|
|
|
'id': 'tSVI8ta_P4w', |
|
|
|
'id': '6069', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Vishal Sikka: The beauty and power of algorithms', |
|
|
|
'title': 'The beauty and power of algorithms', |
|
|
|
'thumbnail': r're:^https?://.+\.jpg', |
|
|
|
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4', |
|
|
|
'upload_date': '20140122', |
|
|
|
'uploader_id': 'TEDInstitute', |
|
|
|
'uploader': 'TED Institute', |
|
|
|
'description': 'md5:734e352710fb00d840ab87ae31aaf688', |
|
|
|
'uploader': 'Vishal Sikka', |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
'add_ie': ['Youtube'], |
|
|
|
}, { |
|
|
|
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best', |
|
|
|
'md5': '71b3ab2f4233012dce09d515c9c39ce2', |
|
|
|
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0', |
|
|
|
'info_dict': { |
|
|
|
'id': '1972', |
|
|
|
'ext': 'mp4', |
|
|
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor): |
|
|
|
'description': 'md5:5174aed4d0f16021b704120360f72b92', |
|
|
|
'duration': 1128, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
}, { |
|
|
|
'url': 'http://www.ted.com/playlists/who_are_the_hackers', |
|
|
|
'info_dict': { |
|
|
@ -91,22 +102,6 @@ class TEDIE(InfoExtractor): |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
}, { |
|
|
|
# YouTube video |
|
|
|
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', |
|
|
|
'add_ie': ['Youtube'], |
|
|
|
'info_dict': { |
|
|
|
'id': 'aFBIPO-P7LM', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', |
|
|
|
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', |
|
|
|
'uploader': 'TEDx Talks', |
|
|
|
'uploader_id': 'TEDxTalks', |
|
|
|
'upload_date': '20111216', |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
}, { |
|
|
|
# no nativeDownloads |
|
|
|
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth', |
|
|
@ -116,6 +111,9 @@ class TEDIE(InfoExtractor): |
|
|
|
'title': 'The orchestra in my mouth', |
|
|
|
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a', |
|
|
|
'uploader': 'Tom Thum', |
|
|
|
'view_count': int, |
|
|
|
'comment_count': int, |
|
|
|
'tags': list, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
@ -174,24 +172,11 @@ class TEDIE(InfoExtractor): |
|
|
|
|
|
|
|
info = self._extract_info(webpage) |
|
|
|
|
|
|
|
talk_info = try_get( |
|
|
|
info, lambda x: x['__INITIAL_DATA__']['talks'][0], |
|
|
|
dict) or info['talks'][0] |
|
|
|
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info |
|
|
|
talk_info = data['talks'][0] |
|
|
|
|
|
|
|
title = talk_info['title'].strip() |
|
|
|
|
|
|
|
external = talk_info.get('external') |
|
|
|
if external: |
|
|
|
service = external['service'] |
|
|
|
self.to_screen('Found video from %s' % service) |
|
|
|
ext_url = None |
|
|
|
if service.lower() == 'youtube': |
|
|
|
ext_url = external.get('code') |
|
|
|
return { |
|
|
|
'_type': 'url', |
|
|
|
'url': ext_url or external['uri'], |
|
|
|
} |
|
|
|
|
|
|
|
native_downloads = try_get( |
|
|
|
talk_info, |
|
|
|
(lambda x: x['downloads']['nativeDownloads'], |
|
|
@ -211,10 +196,24 @@ class TEDIE(InfoExtractor): |
|
|
|
|
|
|
|
player_talk = talk_info['player_talks'][0] |
|
|
|
|
|
|
|
external = player_talk.get('external') |
|
|
|
if isinstance(external, dict): |
|
|
|
service = external.get('service') |
|
|
|
if isinstance(service, compat_str): |
|
|
|
ext_url = None |
|
|
|
if service.lower() == 'youtube': |
|
|
|
ext_url = external.get('code') |
|
|
|
return { |
|
|
|
'_type': 'url', |
|
|
|
'url': ext_url or external['uri'], |
|
|
|
} |
|
|
|
|
|
|
|
resources_ = player_talk.get('resources') or talk_info.get('resources') |
|
|
|
|
|
|
|
http_url = None |
|
|
|
for format_id, resources in resources_.items(): |
|
|
|
if not isinstance(resources, dict): |
|
|
|
continue |
|
|
|
if format_id == 'h264': |
|
|
|
for resource in resources: |
|
|
|
h264_url = resource.get('file') |
|
|
@ -243,8 +242,12 @@ class TEDIE(InfoExtractor): |
|
|
|
'tbr': int_or_none(resource.get('bitrate')), |
|
|
|
}) |
|
|
|
elif format_id == 'hls': |
|
|
|
stream_url = url_or_none(resources.get('stream')) |
|
|
|
if not stream_url: |
|
|
|
continue |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False)) |
|
|
|
stream_url, video_name, 'mp4', m3u8_id=format_id, |
|
|
|
fatal=False)) |
|
|
|
|
|
|
|
m3u8_formats = list(filter( |
|
|
|
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', |
|
|
@ -254,9 +257,13 @@ class TEDIE(InfoExtractor): |
|
|
|
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) |
|
|
|
if not bitrate: |
|
|
|
continue |
|
|
|
bitrate_url = re.sub(r'\d+k', bitrate, http_url) |
|
|
|
if not self._is_valid_url( |
|
|
|
bitrate_url, video_name, '%s bitrate' % bitrate): |
|
|
|
continue |
|
|
|
f = m3u8_format.copy() |
|
|
|
f.update({ |
|
|
|
'url': re.sub(r'\d+k', bitrate, http_url), |
|
|
|
'url': bitrate_url, |
|
|
|
'format_id': m3u8_format['format_id'].replace('hls', 'http'), |
|
|
|
'protocol': 'http', |
|
|
|
}) |
|
|
@ -282,7 +289,11 @@ class TEDIE(InfoExtractor): |
|
|
|
'description': self._og_search_description(webpage), |
|
|
|
'subtitles': self._get_subtitles(video_id, talk_info), |
|
|
|
'formats': formats, |
|
|
|
'duration': talk_info.get('duration'), |
|
|
|
'duration': float_or_none(talk_info.get('duration')), |
|
|
|
'view_count': int_or_none(data.get('viewed_count')), |
|
|
|
'comment_count': int_or_none( |
|
|
|
try_get(data, lambda x: x['comments']['count'])), |
|
|
|
'tags': try_get(talk_info, lambda x: x['tags'], list), |
|
|
|
} |
|
|
|
|
|
|
|
def _get_subtitles(self, video_id, talk_info): |
|
|
|