Merge branch 'master' of github.com:rg3/youtube-dl

11 years ago · ef2fac6f4a
--- a/test/test_write_info_json.py
+++ b/test/test_write_info_json.py
@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
 INFO_JSON_FILE = TEST_ID + '.info.json'
 DESCRIPTION_FILE = TEST_ID + '.mp4.description'
 EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐
 test URL: https://github.com/rg3/youtube-dl/issues/1892

 This is a test video for youtube-dl.

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -145,6 +145,7 @@ from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
        u'file': u'1435540.mp3',
        u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
        u'info_dict': {
            u"title": u"Freddie Gibbs - Lay It Down"
            u"title": u'Freddie Gibbs "Lay It Down"'
        }
    }

--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor):
            u'age_limit': 18,
        },
    },
    # cbs video
    {
        u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
        u'info_dict': {
            u'id': u'0rOxMBabDXN6',
            u'ext': u'flv',
            u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
            u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
            u'duration': 129,
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    },
    ]


@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor):

        video_id = mobj.group(1)

        # Check if video comes from YouTube
        mobj2 = re.match(r'^yt-(.*)$', video_id)
        if mobj2 is not None:
            return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
        # the video may come from an external site
        m_external = re.match('^(\w{2})-(.*)$', video_id)
        if m_external is not None:
            prefix, ext_id = m_external.groups()
            # Check if video comes from YouTube
            if prefix == 'yt':
                return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
            # CBS videos use theplatform.com
            if prefix == 'cb':
                return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')

        # Retrieve video webpage to extract further information
        req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -0,0 +1,69 @@
 import re
 import json

 from .common import InfoExtractor
 from ..utils import (
    xpath_with_ns,
    find_xpath_attr,
 )

 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})


 class ThePlatformIE(InfoExtractor):
    _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'

    _TEST = {
        # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
        u'info_dict': {
            u'id': u'e9I_cZgTgIPd',
            u'ext': u'flv',
            u'title': u'Blackberry\'s big, bold Z30',
            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
            u'duration': 247,
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }

    def _get_info(self, video_id):
        smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
            'format=smil&mbr=true'.format(video_id))
        meta = self._download_xml(smil_url, video_id)
        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)

        head = meta.find(_x('smil:head'))
        body = meta.find(_x('smil:body'))
        base_url = head.find(_x('smil:meta')).attrib['base']
        switch = body.find(_x('smil:switch'))
        formats = []
        for f in switch.findall(_x('smil:video')):
            attr = f.attrib
            formats.append({
                'url': base_url,
                'play_path': 'mp4:' + attr['src'],
                'ext': 'flv',
                'width': int(attr['width']),
                'height': int(attr['height']),
                'vbr': int(attr['system-bitrate']),
            })
        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))

        return {
            'id': video_id,
            'title': info['title'],
            'formats': formats,
            'description': info['description'],
            'thumbnail': info['defaultThumbnailUrl'],
            'duration': info['duration']//1000,
        }
        
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        return self._get_info(video_id)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
    _VALID_URL = r'((http://www\.vevo\.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',