Browse Source

[brightcove] add support for brightcove in page embed(fixes #6824)

totalwebcasting
remitamine 9 years ago
parent
commit
ed1269000f
3 changed files with 116 additions and 2 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +92
    -0
      youtube_dl/extractor/brightcove.py
  3. +20
    -1
      youtube_dl/extractor/generic.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -59,7 +59,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveIE,
BrightcoveInPageEmbedIE,
)
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE


+ 92
- 0
youtube_dl/extractor/brightcove.py View File

@ -22,6 +22,10 @@ from ..utils import (
fix_xml_ampersands, fix_xml_ampersands,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
js_to_json,
int_or_none,
parse_iso8601,
extract_attributes,
) )
@ -346,3 +350,91 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'): if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info
class BrightcoveInPageEmbedIE(InfoExtractor):
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P<video_id>\d+)'
TEST = {
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'info_dict': {
'id': '4463358922001',
'ext': 'flv',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
'duration': 165768,
}
}
@staticmethod
def _extract_url(webpage):
video_attributes = re.search(r'(?s)<video([^>]*)>.*?</(?:video|audio)>', webpage)
if video_attributes:
video_attributes = extract_attributes(video_attributes.group(), r'(?s)\s*data-(account|video-id|playlist-id|policy-key|player|embed)\s*=\s*["\']([^"\']+)["\']')
account_id = video_attributes.get('account')
player_id = video_attributes.get('player')
embed = video_attributes.get('embed')
video_id = video_attributes.get('video-id')
if account_id and player_id and embed and video_id:
return 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (account_id, player_id, embed, video_id)
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
account_id, player_id, embed, video_id = mobj.groups()
webpage = self._download_webpage('http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id)
catalog = self._parse_json(
js_to_json(
self._search_regex(
r'catalog\(({[^}]+})\);',
webpage,
'catalog'
)
),
video_id
)
policy_key = catalog['policyKey']
req = compat_urllib_request.Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
description = json_data.get('description')
thumbnail = json_data.get('name')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = int_or_none(json_data.get('duration'))
formats = []
for source in json_data.get('sources'):
source_type = source.get('type')
if source_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(source.get('src'), video_id))
else:
src = source.get('src')
if src:
formats.append({
'url': src,
'abr': source.get('avg_bitrate'),
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'filesize': source.get('size'),
'container': source.get('container'),
'vcodec': source.get('container'),
})
else:
formats.extend(self._extract_f4m_formats(source.get('streaming_src'), video_id))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}

+ 20
- 1
youtube_dl/extractor/generic.py View File

@ -29,7 +29,10 @@ from ..utils import (
url_basename, url_basename,
xpath_text, xpath_text,
) )
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveIE,
BrightcoveInPageEmbedIE,
)
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@ -1012,6 +1015,17 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'cinemasnob', 'title': 'cinemasnob',
}, },
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
} }
] ]
@ -1288,6 +1302,11 @@ class GenericIE(InfoExtractor):
'entries': entries, 'entries': entries,
} }
# Look for Brightcove In Page Embed:
brightcove_in_page_embed_url = BrightcoveInPageEmbedIE._extract_url(webpage)
if brightcove_in_page_embed_url:
return self.url_result(brightcove_in_page_embed_url, 'BrightcoveInPageEmbed')
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',


Loading…
Cancel
Save