[nationalgeographic] Add extractor (closes #4960)

10 years ago · 6140baf4e1
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -285,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import NationalGeographicIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@ -0,0 +1,38 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
    smuggle_url,
    url_basename,
 )


 class NationalGeographicIE(InfoExtractor):
    _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'

    _TEST = {
        'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
        'info_dict': {
            'id': '4DmDACA6Qtk_',
            'ext': 'flv',
            'title': 'Mating Crabs Busted by Sharks',
            'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
        },
        'add_ie': ['ThePlatform'],
    }

    def _real_extract(self, url):
        name = url_basename(url)

        webpage = self._download_webpage(url, name)
        feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
        guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')

        feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
        content = feed.find('.//{http://search.yahoo.com/mrss/}content')
        theplatform_id = url_basename(content.attrib.get('url'))

        return self.url_result(smuggle_url(
            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
            # For some reason, the normal links don't work and we must force the use of f4m
            {'force_smil_url': True}))
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
        if not provider_id:
            provider_id = 'dJ5BDC'

        if mobj.group('config'):
        if smuggled_data.get('force_smil_url', False):
            smil_url = url
        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')