Browse Source

Merge pull request #2041 from dstftw/imdb-list

[imdb] Add support for IMDb list (#2033)
totalwebcasting
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
4fb757d1e0
3 changed files with 44 additions and 2 deletions
  1. +11
    -1
      test/test_playlists.py
  2. +4
    -1
      youtube_dl/extractor/__init__.py
  3. +29
    -0
      youtube_dl/extractor/imdb.py

+ 11
- 1
test/test_playlists.py View File

@ -28,7 +28,8 @@ from youtube_dl.extractor import (
BandcampAlbumIE, BandcampAlbumIE,
SmotriCommunityIE, SmotriCommunityIE,
SmotriUserIE, SmotriUserIE,
IviCompilationIE
IviCompilationIE,
ImdbListIE,
) )
@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'dezhurnyi_angel/season2') self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20) self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'sMjedvGDd8U')
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
if __name__ == '__main__': if __name__ == '__main__':


+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -80,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE


+ 29
- 0
youtube_dl/extractor/imdb.py View File

@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr, 'description': descr,
'thumbnail': format_info['slate'], 'thumbnail': format_info['slate'],
} }
class ImdbListIE(InfoExtractor):
IE_NAME = u'imdb:list'
IE_DESC = u'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
list_id = mobj.group('id')
# RSS XML is sometimes malformed
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
# However, passing dummy author_id seems to be enough.
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
list_id, u'Downloading list CSV')
entries = []
for item in csv.split('\n')[1:]:
cols = item.split(',')
if len(cols) < 2:
continue
item_id = cols[1][1:-1]
if item_id.startswith('vi'):
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
return self.playlist_result(entries, list_id, list_title)

Loading…
Cancel
Save