Browse Source

[generic] Add support for multiple brightcove URLs (Fixes #2283)

totalwebcasting
Philipp Hagemeister 11 years ago
parent
commit
99877772d0
3 changed files with 36 additions and 12 deletions
  1. +11
    -0
      test/test_playlists.py
  2. +11
    -8
      youtube_dl/extractor/brightcove.py
  3. +14
    -4
      youtube_dl/extractor/generic.py

+ 11
- 0
test/test_playlists.py View File

@ -34,6 +34,7 @@ from youtube_dl.extractor import (
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE, RutubeChannelIE,
GenericIE,
) )
@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409') self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34) self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 11
- 8
youtube_dl/extractor/brightcove.py View File

@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None
"""Try to extract the brightcove url from the webpage, returns None
if it can't be found if it can't be found
""" """
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m: if url_m:
return url_m.group(1)
return [url_m.group(1)]
m_brightcove = re.search(
matches = re.findall(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
if m_brightcove is not None:
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})


+ 14
- 4
youtube_dl/extractor/generic.py View File

@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is not None:
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
surl = smuggle_url(bc_url, {'Referer': url})
return self.url_result(surl, 'Brightcove')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
} for bc_url in bc_urls]
return {
'_type': 'playlist',
'title': video_title,
'id': video_id,
'entries': entries,
}
# Look for embedded (iframe) Vimeo player # Look for embedded (iframe) Vimeo player
mobj = re.search( mobj = re.search(


Loading…
Cancel
Save