[bandcamp] add support for albums (reported in #1270)

11 years ago · 0980426559
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -22,6 +22,7 @@ from youtube_dl.extractor import (
    LivestreamIE,
    NHLVideocenterIE,
    BambuserChannelIE,
    BandcampAlbumIE
 )


@ -103,5 +104,13 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'pixelversity')
        self.assertTrue(len(result['entries']) >= 66)

    def test_bandcamp_album(self):
        dl = FakeYDL()
        ie = BandcampAlbumIE(dl)
        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'Nightmare Night EP')
        self.assertTrue(len(result['entries']) >= 4)

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -11,7 +11,7 @@ from .arte import (
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -3,11 +3,13 @@ import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    ExtractorError,
 )


 class BandcampIE(InfoExtractor):
    IE_NAME = u'Bandcamp'
    _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
    _TEST = {
        u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
@ -61,3 +63,25 @@ class BandcampIE(InfoExtractor):
                      }

        return [track_info]


 class BandcampAlbumIE(InfoExtractor):
    IE_NAME = u'Bandcamp:album'
    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        title = mobj.group('title')
        webpage = self._download_webpage(url, title)
        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
        if not tracks_paths:
            raise ExtractorError(u'The page doesn\'t contain any track')
        entries = [
            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
            for t_path in tracks_paths]
        title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
        return {
            '_type': 'playlist',
            'title': title,
            'entries': entries,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -199,7 +199,8 @@ class GenericIE(InfoExtractor):
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
            burl = unescapeHTML(mobj.group(1))
            return self.url_result(burl, 'Bandcamp')
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)