|
|
@ -975,40 +975,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |
|
|
|
return {} |
|
|
|
try: |
|
|
|
args = player_config['args'] |
|
|
|
caption_url = args['ttsurl'] |
|
|
|
if not caption_url: |
|
|
|
self._downloader.report_warning(err_msg) |
|
|
|
return {} |
|
|
|
timestamp = args['timestamp'] |
|
|
|
# We get the available subtitles |
|
|
|
list_params = compat_urllib_parse.urlencode({ |
|
|
|
'type': 'list', |
|
|
|
'tlangs': 1, |
|
|
|
'asrs': 1, |
|
|
|
}) |
|
|
|
list_url = caption_url + '&' + list_params |
|
|
|
caption_list = self._download_xml(list_url, video_id) |
|
|
|
original_lang_node = caption_list.find('track') |
|
|
|
if original_lang_node is None: |
|
|
|
self._downloader.report_warning('Video doesn\'t have automatic captions') |
|
|
|
return {} |
|
|
|
original_lang = original_lang_node.attrib['lang_code'] |
|
|
|
caption_kind = original_lang_node.attrib.get('kind', '') |
|
|
|
caption_url = args.get('ttsurl') |
|
|
|
if caption_url: |
|
|
|
timestamp = args['timestamp'] |
|
|
|
# We get the available subtitles |
|
|
|
list_params = compat_urllib_parse.urlencode({ |
|
|
|
'type': 'list', |
|
|
|
'tlangs': 1, |
|
|
|
'asrs': 1, |
|
|
|
}) |
|
|
|
list_url = caption_url + '&' + list_params |
|
|
|
caption_list = self._download_xml(list_url, video_id) |
|
|
|
original_lang_node = caption_list.find('track') |
|
|
|
if original_lang_node is None: |
|
|
|
self._downloader.report_warning('Video doesn\'t have automatic captions') |
|
|
|
return {} |
|
|
|
original_lang = original_lang_node.attrib['lang_code'] |
|
|
|
caption_kind = original_lang_node.attrib.get('kind', '') |
|
|
|
|
|
|
|
sub_lang_list = {} |
|
|
|
for lang_node in caption_list.findall('target'): |
|
|
|
sub_lang = lang_node.attrib['lang_code'] |
|
|
|
sub_formats = [] |
|
|
|
for ext in self._SUBTITLE_FORMATS: |
|
|
|
params = compat_urllib_parse.urlencode({ |
|
|
|
'lang': original_lang, |
|
|
|
'tlang': sub_lang, |
|
|
|
'fmt': ext, |
|
|
|
'ts': timestamp, |
|
|
|
'kind': caption_kind, |
|
|
|
}) |
|
|
|
sub_formats.append({ |
|
|
|
'url': caption_url + '&' + params, |
|
|
|
'ext': ext, |
|
|
|
}) |
|
|
|
sub_lang_list[sub_lang] = sub_formats |
|
|
|
return sub_lang_list |
|
|
|
|
|
|
|
# Some videos don't provide ttsurl but rather caption_tracks and |
|
|
|
# caption_translation_languages (e.g. 20LmZk1hakA) |
|
|
|
caption_tracks = args['caption_tracks'] |
|
|
|
caption_translation_languages = args['caption_translation_languages'] |
|
|
|
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] |
|
|
|
parsed_caption_url = compat_urlparse.urlparse(caption_url) |
|
|
|
caption_qs = compat_parse_qs(parsed_caption_url.query) |
|
|
|
|
|
|
|
sub_lang_list = {} |
|
|
|
for lang_node in caption_list.findall('target'): |
|
|
|
sub_lang = lang_node.attrib['lang_code'] |
|
|
|
for lang in caption_translation_languages.split(','): |
|
|
|
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) |
|
|
|
sub_lang = lang_qs.get('lc', [None])[0] |
|
|
|
if not sub_lang: |
|
|
|
continue |
|
|
|
sub_formats = [] |
|
|
|
for ext in self._SUBTITLE_FORMATS: |
|
|
|
params = compat_urllib_parse.urlencode({ |
|
|
|
'lang': original_lang, |
|
|
|
'tlang': sub_lang, |
|
|
|
'fmt': ext, |
|
|
|
'ts': timestamp, |
|
|
|
'kind': caption_kind, |
|
|
|
caption_qs.update({ |
|
|
|
'tlang': [sub_lang], |
|
|
|
'fmt': [ext], |
|
|
|
}) |
|
|
|
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( |
|
|
|
query=compat_urllib_parse.urlencode(caption_qs, True))) |
|
|
|
sub_formats.append({ |
|
|
|
'url': caption_url + '&' + params, |
|
|
|
'url': sub_url, |
|
|
|
'ext': ext, |
|
|
|
}) |
|
|
|
sub_lang_list[sub_lang] = sub_formats |
|
|
|