Browse Source

[kaltura] Add support for multiple embeds on a webpage (closes #25523)

master
Sergey M․ 5 years ago
parent
commit
562de77f41
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 28 additions and 9 deletions
  1. +15
    -3
      youtube_dl/extractor/generic.py
  2. +13
    -6
      youtube_dl/extractor/kaltura.py

+ 15
- 3
youtube_dl/extractor/generic.py View File

@ -1708,6 +1708,15 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
{
# multiple kaltura embeds, nsfw
'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
'info_dict': {
'id': 'kamila-avec-video-jaime-sadomie',
'title': "Kamila avec vídeo “J'aime sadomie”",
},
'playlist_count': 8,
},
{ {
# Non-standard Vimeo embed # Non-standard Vimeo embed
'url': 'https://openclassrooms.com/courses/understanding-the-web', 'url': 'https://openclassrooms.com/courses/understanding-the-web',
@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks') return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds # Look for Kaltura embeds
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
kaltura_urls = KalturaIE._extract_urls(webpage)
if kaltura_urls:
return self.playlist_from_matches(
kaltura_urls, video_id, video_title,
getter=lambda x: smuggle_url(x, {'source_url': url}),
ie=KalturaIE.ie_key())
# Look for EaglePlatform embeds # Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage) eagleplatform_url = EaglePlatformIE._extract_url(webpage)


+ 13
- 6
youtube_dl/extractor/kaltura.py View File

@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
urls = KalturaIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
mobj = (
re.search(
finditer = (
re.finditer(
r"""(?xs) r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\( kWidget\.(?:thumb)?[Ee]mbed\(
\{.*? \{.*?
@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor):
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) """, webpage)
or re.search(
or re.finditer(
r'''(?xs) r'''(?xs)
(?P<q1>["']) (?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor):
) )
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) ''', webpage)
or re.search(
or re.finditer(
r'''(?xs) r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor):
(?P=q1) (?P=q1)
''', webpage) ''', webpage)
) )
if mobj:
urls = []
for mobj in finditer:
embed_info = mobj.groupdict() embed_info = mobj.groupdict()
for k, v in embed_info.items(): for k, v in embed_info.items():
if v: if v:
@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor):
webpage) webpage)
if service_mobj: if service_mobj:
url = smuggle_url(url, {'service_url': service_mobj.group('id')}) url = smuggle_url(url, {'service_url': service_mobj.group('id')})
return url
urls.append(url)
return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0] params = actions[0]


Loading…
Cancel
Save