|
|
@ -18,6 +18,7 @@ from ..compat import ( |
|
|
|
compat_HTTPError, |
|
|
|
compat_http_client, |
|
|
|
compat_urllib_error, |
|
|
|
compat_urllib_parse, |
|
|
|
compat_urllib_parse_urlparse, |
|
|
|
compat_urllib_request, |
|
|
|
compat_urlparse, |
|
|
@ -37,6 +38,7 @@ from ..utils import ( |
|
|
|
RegexNotFoundError, |
|
|
|
sanitize_filename, |
|
|
|
unescapeHTML, |
|
|
|
url_basename, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
@ -978,69 +980,165 @@ class InfoExtractor(object): |
|
|
|
self._sort_formats(formats) |
|
|
|
return formats |
|
|
|
|
|
|
|
# TODO: improve extraction |
|
|
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True): |
|
|
|
smil = self._download_xml( |
|
|
|
smil_url, video_id, 'Downloading SMIL file', |
|
|
|
'Unable to download SMIL file', fatal=fatal) |
|
|
|
@staticmethod |
|
|
|
def _xpath_ns(path, namespace=None): |
|
|
|
if not namespace: |
|
|
|
return path |
|
|
|
out = [] |
|
|
|
for c in path.split('/'): |
|
|
|
if not c or c == '.': |
|
|
|
out.append(c) |
|
|
|
else: |
|
|
|
out.append('{%s}%s' % (namespace, c)) |
|
|
|
return '/'.join(out) |
|
|
|
|
|
|
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None): |
|
|
|
smil = self._download_smil(smil_url, video_id, fatal=fatal) |
|
|
|
|
|
|
|
if smil is False: |
|
|
|
assert not fatal |
|
|
|
return [] |
|
|
|
|
|
|
|
base = smil.find('./head/meta').get('base') |
|
|
|
namespace = self._search_regex( |
|
|
|
r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) |
|
|
|
|
|
|
|
return self._parse_smil_formats( |
|
|
|
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) |
|
|
|
|
|
|
|
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): |
|
|
|
smil = self._download_smil(smil_url, video_id, fatal=fatal) |
|
|
|
if smil is False: |
|
|
|
return {} |
|
|
|
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) |
|
|
|
|
|
|
|
def _download_smil(self, smil_url, video_id, fatal=True): |
|
|
|
return self._download_xml( |
|
|
|
smil_url, video_id, 'Downloading SMIL file', |
|
|
|
'Unable to download SMIL file', fatal=fatal) |
|
|
|
|
|
|
|
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): |
|
|
|
namespace = self._search_regex( |
|
|
|
r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) |
|
|
|
|
|
|
|
formats = self._parse_smil_formats( |
|
|
|
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) |
|
|
|
subtitles = self._parse_smil_subtitles(smil, namespace=namespace) |
|
|
|
|
|
|
|
video_id = os.path.splitext(url_basename(smil_url))[0] |
|
|
|
title = None |
|
|
|
description = None |
|
|
|
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): |
|
|
|
name = meta.attrib.get('name') |
|
|
|
content = meta.attrib.get('content') |
|
|
|
if not name or not content: |
|
|
|
continue |
|
|
|
if not title and name == 'title': |
|
|
|
title = content |
|
|
|
elif not description and name in ('description', 'abstract'): |
|
|
|
description = content |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title or video_id, |
|
|
|
'description': description, |
|
|
|
'formats': formats, |
|
|
|
'subtitles': subtitles, |
|
|
|
} |
|
|
|
|
|
|
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): |
|
|
|
base = smil_url |
|
|
|
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): |
|
|
|
b = meta.get('base') or meta.get('httpBase') |
|
|
|
if b: |
|
|
|
base = b |
|
|
|
break |
|
|
|
|
|
|
|
formats = [] |
|
|
|
rtmp_count = 0 |
|
|
|
if smil.findall('./body/seq/video'): |
|
|
|
video = smil.findall('./body/seq/video')[0] |
|
|
|
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) |
|
|
|
formats.extend(fmts) |
|
|
|
else: |
|
|
|
for video in smil.findall('./body/switch/video'): |
|
|
|
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) |
|
|
|
formats.extend(fmts) |
|
|
|
http_count = 0 |
|
|
|
|
|
|
|
videos = smil.findall(self._xpath_ns('.//video', namespace)) |
|
|
|
for video in videos: |
|
|
|
src = video.get('src') |
|
|
|
if not src: |
|
|
|
continue |
|
|
|
|
|
|
|
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) |
|
|
|
filesize = int_or_none(video.get('size') or video.get('fileSize')) |
|
|
|
width = int_or_none(video.get('width')) |
|
|
|
height = int_or_none(video.get('height')) |
|
|
|
proto = video.get('proto') |
|
|
|
ext = video.get('ext') |
|
|
|
src_ext = determine_ext(src) |
|
|
|
streamer = video.get('streamer') or base |
|
|
|
|
|
|
|
if proto == 'rtmp' or streamer.startswith('rtmp'): |
|
|
|
rtmp_count += 1 |
|
|
|
formats.append({ |
|
|
|
'url': streamer, |
|
|
|
'play_path': src, |
|
|
|
'ext': 'flv', |
|
|
|
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), |
|
|
|
'tbr': bitrate, |
|
|
|
'filesize': filesize, |
|
|
|
'width': width, |
|
|
|
'height': height, |
|
|
|
}) |
|
|
|
continue |
|
|
|
|
|
|
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) |
|
|
|
|
|
|
|
if proto == 'm3u8' or src_ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
src_url, video_id, ext or 'mp4', m3u8_id='hls')) |
|
|
|
continue |
|
|
|
|
|
|
|
if src_ext == 'f4m': |
|
|
|
f4m_url = src_url |
|
|
|
if not f4m_params: |
|
|
|
f4m_params = { |
|
|
|
'hdcore': '3.2.0', |
|
|
|
'plugin': 'flowplayer-3.2.0.1', |
|
|
|
} |
|
|
|
f4m_url += '&' if '?' in f4m_url else '?' |
|
|
|
f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8') |
|
|
|
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) |
|
|
|
continue |
|
|
|
|
|
|
|
if src_url.startswith('http'): |
|
|
|
http_count += 1 |
|
|
|
formats.append({ |
|
|
|
'url': src_url, |
|
|
|
'ext': ext or src_ext or 'flv', |
|
|
|
'format_id': 'http-%d' % (bitrate or http_count), |
|
|
|
'tbr': bitrate, |
|
|
|
'filesize': filesize, |
|
|
|
'width': width, |
|
|
|
'height': height, |
|
|
|
}) |
|
|
|
continue |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
return formats |
|
|
|
|
|
|
|
def _parse_smil_video(self, video, video_id, base, rtmp_count): |
|
|
|
src = video.get('src') |
|
|
|
if not src: |
|
|
|
return [], rtmp_count |
|
|
|
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) |
|
|
|
width = int_or_none(video.get('width')) |
|
|
|
height = int_or_none(video.get('height')) |
|
|
|
proto = video.get('proto') |
|
|
|
if not proto: |
|
|
|
if base: |
|
|
|
if base.startswith('rtmp'): |
|
|
|
proto = 'rtmp' |
|
|
|
elif base.startswith('http'): |
|
|
|
proto = 'http' |
|
|
|
ext = video.get('ext') |
|
|
|
if proto == 'm3u8': |
|
|
|
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count |
|
|
|
elif proto == 'rtmp': |
|
|
|
rtmp_count += 1 |
|
|
|
streamer = video.get('streamer') or base |
|
|
|
return ([{ |
|
|
|
'url': streamer, |
|
|
|
'play_path': src, |
|
|
|
'ext': 'flv', |
|
|
|
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), |
|
|
|
'tbr': bitrate, |
|
|
|
'width': width, |
|
|
|
'height': height, |
|
|
|
}], rtmp_count) |
|
|
|
elif proto.startswith('http'): |
|
|
|
return ([{ |
|
|
|
'url': base + src, |
|
|
|
'ext': ext or 'flv', |
|
|
|
'tbr': bitrate, |
|
|
|
'width': width, |
|
|
|
'height': height, |
|
|
|
}], rtmp_count) |
|
|
|
def _parse_smil_subtitles(self, smil, namespace=None): |
|
|
|
subtitles = {} |
|
|
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): |
|
|
|
src = textstream.get('src') |
|
|
|
if not src: |
|
|
|
continue |
|
|
|
ext = textstream.get('ext') or determine_ext(src) |
|
|
|
if not ext: |
|
|
|
type_ = textstream.get('type') |
|
|
|
if type_ == 'text/srt': |
|
|
|
ext = 'srt' |
|
|
|
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') |
|
|
|
subtitles.setdefault(lang, []).append({ |
|
|
|
'url': src, |
|
|
|
'ext': ext, |
|
|
|
}) |
|
|
|
return subtitles |
|
|
|
|
|
|
|
def _live_title(self, name): |
|
|
|
""" Generate the title for a live video """ |
|
|
|