Browse Source

[nbc] Add an extractor for the main nbc.com site

Some of the videos are encrypted, the f4m downloader doesn’t support them.
totalwebcasting
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
020cf5ebfd
3 changed files with 38 additions and 5 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +24
    -0
      youtube_dl/extractor/nbc.py
  3. +10
    -4
      youtube_dl/extractor/theplatform.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -152,7 +152,10 @@ from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE
from .nbc import (
NBCIE,
NBCNewsIE,
)
from .ndr import NDRIE from .ndr import NDRIE
from .ndtv import NDTVIE from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE


+ 24
- 0
youtube_dl/extractor/nbc.py View File

@ -6,6 +6,30 @@ from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str from ..utils import find_xpath_attr, compat_str
class NBCIE(InfoExtractor):
_VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)'
_TEST = {
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
'info_dict': {
'id': 'u1RInQZRN7QJ',
'ext': 'flv',
'title': 'I Am a Firefighter',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
return self.url_result(theplatform_url)
class NBCNewsIE(InfoExtractor): class NBCNewsIE(InfoExtractor):
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'


+ 10
- 4
youtube_dl/extractor/theplatform.py View File

@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
class ThePlatformIE(InfoExtractor): class ThePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
(?P<config>[^/\?]+/(?:swf|config)/select/)?
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)''' |theplatform:)(?P<id>[^/\?&]+)'''
_TEST = { _TEST = {
@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor):
f4m_node = body.find(_x('smil:seq/smil:video')) f4m_node = body.find(_x('smil:seq/smil:video'))
if f4m_node is not None: if f4m_node is not None:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
f4m_url += '?'
# the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
formats = [{ formats = [{
'ext': 'flv', 'ext': 'flv',
# the parameters are from syfy.com, other sites may use others
'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3',
'url': f4m_url,
}] }]
else: else:
base_url = head.find(_x('smil:meta')).attrib['base'] base_url = head.find(_x('smil:meta')).attrib['base']
@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor):
if mobj.group('config'): if mobj.group('config'):
config_url = url+ '&form=json' config_url = url+ '&form=json'
config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
config_json = self._download_webpage(config_url, video_id, u'Downloading config') config_json = self._download_webpage(config_url, video_id, u'Downloading config')
config = json.loads(config_json) config = json.loads(config_json)
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4'
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else: else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id)) 'format=smil&mbr=true'.format(video_id))


Loading…
Cancel
Save