From 05d0d131a76e34af843d982dd47b556ddc8b9e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 20:05:56 +0800 Subject: [PATCH 01/12] [youtube] Move decrypt_sig out of _parse_dash_manifest --- youtube_dl/extractor/youtube.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a24c73584..08b7e15c4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1035,22 +1035,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') - def _parse_dash_manifest( - self, video_id, dash_manifest_url, player_url, age_gate, fatal=True): - def decrypt_sig(mobj): - s = mobj.group(1) - dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) - return '/signature/%s' % dec_s - dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url) - dash_doc = self._download_xml( - dash_manifest_url, video_id, - note='Downloading DASH manifest', - errnote='Could not download DASH manifest', - fatal=fatal) - - if dash_doc is False: - return [] - + def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): formats = [] for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): mime_type = a.attrib.get('mimeType') @@ -1533,8 +1518,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for dash_manifest_url in dash_mpds: dash_formats = {} try: - for df in self._parse_dash_manifest( - video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal): + def decrypt_sig(mobj): + s = mobj.group(1) + dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) + return '/signature/%s' % dec_s + + dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url) + dash_doc = self._download_xml( + dash_manifest_url, video_id, + note='Downloading DASH manifest', + errnote='Could not download DASH manifest', + fatal=dash_mpd_fatal) + + for df in self._parse_dash_manifest(video_id, dash_doc, dash_mpd_fatal): # Do not overwrite DASH format found in some previous DASH manifest if df['format_id'] not in dash_formats: dash_formats[df['format_id']] = df From 17b598d30cae2c287f3556f874ddf0fc5d028aec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:05:55 +0800 Subject: [PATCH 02/12] [common] _parse_dash_manifest() from youtube.py --- youtube_dl/extractor/common.py | 52 +++++++++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 52 --------------------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b3d57dfce..7ad255672 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,6 +1330,58 @@ class InfoExtractor(object): }) return entries + def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): + formats = [] + for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + mime_type = a.attrib.get('mimeType') + for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + if mime_type == 'text/vtt': + # TODO implement WebVTT downloading + pass + elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + if segment_list is not None: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'protocol': 'http_dash_segments', + }) + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + codecs = r.attrib.get('codecs') + if codecs: + if full_info.get('acodec') == 'none': + full_info['vcodec'] = codecs + elif full_info.get('vcodec') == 'none': + full_info['acodec'] = codecs + formats.append(full_info) + else: + existing_format.update(f) + else: + self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 08b7e15c4..acd5cf2c3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1035,58 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') - def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): - formats = [] - for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): - mime_type = a.attrib.get('mimeType') - for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - if mime_type == 'text/vtt': - # TODO implement WebVTT downloading - pass - elif mime_type.startswith('audio/') or mime_type.startswith('video/'): - segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'height': int_or_none(r.attrib.get('height')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - if segment_list is not None: - f.update({ - 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], - 'protocol': 'http_dash_segments', - }) - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - full_info = self._formats.get(format_id, {}).copy() - full_info.update(f) - codecs = r.attrib.get('codecs') - if codecs: - if full_info.get('acodec') == 'none': - full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none': - full_info['acodec'] = codecs - formats.append(full_info) - else: - existing_format.update(f) - else: - self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - return formats - def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From b323e1707d8f058b88a5f15f3418b31cf969399d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:27:43 +0800 Subject: [PATCH 03/12] [common] Modify _parse_dash_manifest for use in Facebook --- youtube_dl/extractor/common.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7ad255672..83628a68f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,22 +1330,24 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): + def _parse_dash_manifest(self, video_id, dash_doc, default_ns='urn:mpeg:DASH:schema:MPD:2011', formats_dict={}, fatal=True): + def _add_ns(tag): + return '{%s}%s' % (default_ns, tag) + formats = [] - for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')): mime_type = a.attrib.get('mimeType') - for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue + for r in a.findall(_add_ns('Representation')): + mime_type = r.attrib.get('mimeType') or mime_type + url_el = r.find(_add_ns('BaseURL')) if mime_type == 'text/vtt': # TODO implement WebVTT downloading pass elif mime_type.startswith('audio/') or mime_type.startswith('video/'): - segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') + segment_list = r.find(_add_ns('SegmentList')) format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + video_url = url_el.text if url_el else None + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el else None) f = { 'format_id': format_id, 'url': video_url, @@ -1357,17 +1359,20 @@ class InfoExtractor(object): 'fps': int_or_none(r.attrib.get('frameRate')), } if segment_list is not None: + initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL'] f.update({ - 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'initialization_url': initialization_url, + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))], 'protocol': 'http_dash_segments', }) + if not f.get('url'): + f['url'] = initialization_url try: existing_format = next( fo for fo in formats if fo['format_id'] == format_id) except StopIteration: - full_info = self._formats.get(format_id, {}).copy() + full_info = formats_dict.get(format_id, {}).copy() full_info.update(f) codecs = r.attrib.get('codecs') if codecs: From 0803753fea578355db68ae9f2d915db04cde7557 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:31:53 +0800 Subject: [PATCH 04/12] [facebook] Add support for DASH manifests --- youtube_dl/extractor/facebook.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 899b0896b..1f3e270d9 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -6,9 +6,11 @@ import socket from .common import InfoExtractor from ..compat import ( + compat_etree_fromstring, compat_http_client, compat_urllib_error, compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, ) from ..utils import ( error_to_compat_str, @@ -44,6 +46,9 @@ class FacebookIE(InfoExtractor): _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' IE_NAME = 'facebook' + + _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' + _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'md5': '6a40d33c0eccbb1af76cf0485a052659', @@ -65,6 +70,15 @@ class FacebookIE(InfoExtractor): 'expected_warnings': [ 'title' ] + }, { + 'note': 'Video with DASH manifest', + 'url': 'https://www.facebook.com/video.php?v=957955867617029', + 'info_dict': { + 'id': '957955867617029', + 'ext': 'mp4', + 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', + 'uploader': 'Demy de Zeeuw', + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -147,8 +161,9 @@ class FacebookIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://www.facebook.com/video/video.php?v=%s' % video_id - webpage = self._download_webpage(url, video_id) + req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id) + req.add_header('User-Agent', self._CHROME_USER_AGENT) + webpage = self._download_webpage(req, video_id) video_data = None @@ -197,9 +212,16 @@ class FacebookIE(InfoExtractor): 'url': src, 'preference': -10 if format_id == 'progressive' else 0, }) + dash_manifest = f[0].get('dash_manifest') + if dash_manifest: + formats.extend(self._parse_dash_manifest( + video_id, compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)), + default_ns='urn:mpeg:dash:schema:mpd:2011')) if not formats: raise ExtractorError('Cannot find video formats') + self._sort_formats(formats) + video_title = self._html_search_regex( r']*class="uiHeaderTitle"[^>]*>([^<]*)', webpage, 'title', default=None) From 5d2c0fd9ba2251a5c91b7d8b1dd285b427a2251e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:32:15 +0800 Subject: [PATCH 05/12] [youtube] Pass self._formats to _parse_dash_manifest --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index acd5cf2c3..c1d570cb3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1478,7 +1478,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Could not download DASH manifest', fatal=dash_mpd_fatal) - for df in self._parse_dash_manifest(video_id, dash_doc, dash_mpd_fatal): + for df in self._parse_dash_manifest( + video_id, dash_doc, formats_dict=self._formats, fatal=dash_mpd_fatal): # Do not overwrite DASH format found in some previous DASH manifest if df['format_id'] not in dash_formats: dash_formats[df['format_id']] = df From 5ea1eb78f5e8d9b2ede35504dc3b999f5f89bbaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:36:01 +0800 Subject: [PATCH 06/12] [common] Fix for youtube --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 83628a68f..243db71dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1346,8 +1346,8 @@ class InfoExtractor(object): elif mime_type.startswith('audio/') or mime_type.startswith('video/'): segment_list = r.find(_add_ns('SegmentList')) format_id = r.attrib['id'] - video_url = url_el.text if url_el else None - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el else None) + video_url = url_el.text if url_el is not None else None + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) f = { 'format_id': format_id, 'url': video_url, From df374b52228e8a083d045f9bc56847e418ef452e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:42:27 +0800 Subject: [PATCH 07/12] [common] Prefer the manifest than formats_dict in determining codecs --- youtube_dl/extractor/common.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 243db71dc..f1313ef04 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1376,10 +1376,15 @@ class InfoExtractor(object): full_info.update(f) codecs = r.attrib.get('codecs') if codecs: - if full_info.get('acodec') == 'none': - full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none': - full_info['acodec'] = codecs + if mime_type.startswith('video/'): + vcodec, acodec = codecs, 'none' + else: # mime_type.startswith('audio/') + vcodec, acodec = 'none', codecs + + full_info.update({ + 'vcodec': vcodec, + 'acodec': acodec, + }) formats.append(full_info) else: existing_format.update(f) From a6c2c24479e5f4827ceb06f64d855329c0a6f593 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 22:28:53 +0800 Subject: [PATCH 08/12] [youtube] Remove '(v|a)codec': 'none' entries Not used anymore --- youtube_dl/extractor/youtube.py | 80 ++++++++++++++++----------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c1d570cb3..f992c3624 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -316,55 +316,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, # DASH mp4 video - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Dash mp4 audio - '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, - '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, - '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, # Dash webm - '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, - '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, - '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, - '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, - '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, # Dash webm audio - '171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, - '172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, # Dash webm audio with opus inside - '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, - '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50}, - '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50}, + '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, + '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50}, + '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50}, # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, From 16f38a699f7c6d2820720d704a2373ba28c568b1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 22:40:13 +0800 Subject: [PATCH 09/12] [common] Rename to namespace For consistency with _parse_smil_* --- youtube_dl/extractor/common.py | 6 +++--- youtube_dl/extractor/facebook.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f1313ef04..a05efec9e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,9 +1330,9 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, default_ns='urn:mpeg:DASH:schema:MPD:2011', formats_dict={}, fatal=True): - def _add_ns(tag): - return '{%s}%s' % (default_ns, tag) + def _parse_dash_manifest(self, video_id, dash_doc, namespace=None, formats_dict={}, fatal=True): + def _add_ns(path): + return self._xpath_ns(path, namespace) formats = [] for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')): diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 1f3e270d9..af38eaff1 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -216,7 +216,7 @@ class FacebookIE(InfoExtractor): if dash_manifest: formats.extend(self._parse_dash_manifest( video_id, compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)), - default_ns='urn:mpeg:dash:schema:mpd:2011')) + namespace='urn:mpeg:dash:schema:mpd:2011')) if not formats: raise ExtractorError('Cannot find video formats') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f992c3624..8d3fcfa5f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1479,7 +1479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): fatal=dash_mpd_fatal) for df in self._parse_dash_manifest( - video_id, dash_doc, formats_dict=self._formats, fatal=dash_mpd_fatal): + video_id, dash_doc, namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats, fatal=dash_mpd_fatal): # Do not overwrite DASH format found in some previous DASH manifest if df['format_id'] not in dash_formats: dash_formats[df['format_id']] = df From c94678957fbe4483b2b7c8b3e6824cb7a215d42d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 22:45:16 +0800 Subject: [PATCH 10/12] [common] Remove unused arguments --- youtube_dl/extractor/common.py | 2 +- youtube_dl/extractor/facebook.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a05efec9e..5a2b7a721 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,7 +1330,7 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, namespace=None, formats_dict={}, fatal=True): + def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}): def _add_ns(path): return self._xpath_ns(path, namespace) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index af38eaff1..bd65e43f2 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -215,7 +215,7 @@ class FacebookIE(InfoExtractor): dash_manifest = f[0].get('dash_manifest') if dash_manifest: formats.extend(self._parse_dash_manifest( - video_id, compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)), + compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)), namespace='urn:mpeg:dash:schema:mpd:2011')) if not formats: raise ExtractorError('Cannot find video formats') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8d3fcfa5f..a2f776050 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1479,7 +1479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): fatal=dash_mpd_fatal) for df in self._parse_dash_manifest( - video_id, dash_doc, namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats, fatal=dash_mpd_fatal): + dash_doc, namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats): # Do not overwrite DASH format found in some previous DASH manifest if df['format_id'] not in dash_formats: dash_formats[df['format_id']] = df From 2d2fa82d172a10a49fb5449fa35bc409de778f05 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 22:52:23 +0800 Subject: [PATCH 11/12] [common] Add _extract_dash_manifest_formats --- youtube_dl/extractor/common.py | 15 +++++++++++++++ youtube_dl/extractor/youtube.py | 12 ++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a2b7a721..199a04d1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,6 +1330,21 @@ class InfoExtractor(object): }) return entries + def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True): + return self._download_xml( + dash_manifest_url, video_id, + note='Downloading DASH manifest', + errnote='Could not download DASH manifest', + fatal=fatal) + + def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}): + dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal) + if dash_doc is False: + return [] + + return self._parse_dash_manifest( + dash_doc, namespace=namespace, formats_dict=formats_dict) + def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}): def _add_ns(path): return self._xpath_ns(path, namespace) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a2f776050..d6fef39e9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1472,14 +1472,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return '/signature/%s' % dec_s dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url) - dash_doc = self._download_xml( - dash_manifest_url, video_id, - note='Downloading DASH manifest', - errnote='Could not download DASH manifest', - fatal=dash_mpd_fatal) - - for df in self._parse_dash_manifest( - dash_doc, namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats): + + for df in self._extract_dash_manifest_formats( + dash_manifest_url, video_id, fatal=dash_mpd_fatal, + namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats): # Do not overwrite DASH format found in some previous DASH manifest if df['format_id'] not in dash_formats: dash_formats[df['format_id']] = df From 248ae880b6900a753e38253f149c013c3f95c0c5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 23:01:19 +0800 Subject: [PATCH 12/12] [facebook] Add md5 for the test case with DASH --- youtube_dl/extractor/facebook.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index bd65e43f2..b6d1180f0 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -73,6 +73,7 @@ class FacebookIE(InfoExtractor): }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', + 'md5': '54706e4db4f5ad58fbad82dde1f1213f', 'info_dict': { 'id': '957955867617029', 'ext': 'mp4',