Browse Source

[cbsnews] add support for live videos(fixes #7010)

totalwebcasting
remitamine 9 years ago
parent
commit
fd3a1f3d60
2 changed files with 48 additions and 9 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +44
    -8
      youtube_dl/extractor/cbsnews.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canvas import CanvasIE from .canvas import CanvasIE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .cbsnews import (
CBSNewsIE,
CBSNewsLiveVideoIE,
)
from .cbssports import CBSSportsIE from .cbssports import CBSSportsIE
from .ccc import CCCIE from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE


+ 44
- 8
youtube_dl/extractor/cbsnews.py View File

@ -1,15 +1,14 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import parse_duration
class CBSNewsIE(ThePlatformIE): class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [ _TESTS = [
{ {
@ -48,14 +47,13 @@ class CBSNewsIE(ThePlatformIE):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex(
video_info = self._parse_json(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'))
webpage, 'video JSON info'), video_id)
item = video_info['item'] if 'item' in video_info else video_info item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed') title = item.get('articleTitle') or item.get('hed')
@ -88,3 +86,41 @@ class CBSNewsIE(ThePlatformIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }
class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
'info_dict': {
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
'ext': 'flv',
'title': 'Clinton, Sanders Prepare To Face Off In NH',
'duration': 334,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(self._html_search_regex(
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
hdcore_sign = 'hdcore=3.3.1'
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
if f4m_formats:
for entry in f4m_formats:
# URLs without the extra param induce an 404 error
entry.update({'extra_param_to_segment_url': hdcore_sign})
return {
'id': video_id,
'title': video_info['headline'],
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
'duration': parse_duration(video_info.get('segmentDur')),
'formats': f4m_formats,
}

Loading…
Cancel
Save