Browse Source

[limelight] Improve embeds extraction (closes #12761)

* Move extraction code to extractor
* Add extraction for LimelightEmbeddedPlayerFlash embeds
* Extract multiple video
master-ytdl-org
Sergey M․ 8 years ago
parent
commit
e5d39886ec
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 43 additions and 0 deletions
  1. +6
    -0
      youtube_dl/extractor/generic.py
  2. +37
    -0
      youtube_dl/extractor/limelight.py

+ 6
- 0
youtube_dl/extractor/generic.py View File

@ -85,6 +85,7 @@ from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import OpenloadIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2483,6 +2484,11 @@ class GenericIE(InfoExtractor):
return self.url_result(piksel_url, PikselIE.ie_key()) return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds # Look for Limelight embeds
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
if limelight_urls:
return self.playlist_result(
limelight_urls, video_id, video_title, video_description)
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj: if mobj:
lm = { lm = {


+ 37
- 0
youtube_dl/extractor/limelight.py View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
smuggle_url,
unsmuggle_url, unsmuggle_url,
ExtractorError, ExtractorError,
) )
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
@classmethod
def _extract_urls(cls, webpage, source_url):
lm = {
'Media': 'media',
'Channel': 'channel',
'ChannelList': 'channel_list',
}
entries = []
for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage):
print('video_id', video_id)
entries.append(cls.url_result(
smuggle_url(
'limelight:%s:%s' % (lm[kind], video_id),
{'source_url': source_url}),
'Limelight%s' % kind, video_id))
for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to
# LimelightEmbeddedPlayerFlash but numerous examples seen
# that don't exactly match it (e.g. [2]).
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
# 2. http://www.sedona.com/FacilitatorTraining2017
r'''(?sx)
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
<param[^>]+
name=(["\'])flashVars\2[^>]+
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
''', webpage):
entries.append(cls.url_result(
smuggle_url(
'limelight:media:%s' % mobj.group('id'),
{'source_url': source_url}),
'LimelightMedia', mobj.group('id')))
return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None): def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
headers = {} headers = {}
if referer: if referer:


Loading…
Cancel
Save