[vimeo] add an extractor for channels

12 years ago · caeefc29eb
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from youtube_dl.extractor import DailymotionPlaylistIE
 from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
 from youtube_dl.utils import *

 from helper import FakeYDL
@ -26,5 +26,13 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'SPORT')
        self.assertTrue(len(result['entries']) > 20)

    def test_vimeo_channel(self):
        dl = FakeYDL()
        ie = VimeoChannelIE(dl)
        result = ie.extract('http://vimeo.com/channels/tributes')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'Vimeo Tributes')
        self.assertTrue(len(result['entries']) > 24)

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -71,7 +71,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -1,5 +1,6 @@
 import json
 import re
 import itertools

 from .common import InfoExtractor
 from ..utils import (
@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
            'thumbnail':    video_thumbnail,
            'description':  video_description,
        }]


 class VimeoChannelIE(InfoExtractor):
    IE_NAME = u'vimeo:channel'
    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        channel_id =  mobj.group('id')
        video_ids = []

        for pagenum in itertools.count(1):
            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
                                             channel_id, u'Downloading page %s' % pagenum)
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break

        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
                   for video_id in video_ids]
        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
                                                webpage, u'channel title')
        return {'_type': 'playlist',
                'id': channel_id,
                'title': channel_title,
                'entries': entries,
                }