You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.5 KiB

  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. )
  6. class TumblrIE(InfoExtractor):
  7. _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
  8. def _real_extract(self, url):
  9. m_url = re.match(self._VALID_URL, url)
  10. video_id = m_url.group('id')
  11. blog = m_url.group('blog_name')
  12. url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
  13. webpage = self._download_webpage(url, video_id)
  14. re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
  15. video = re.search(re_video, webpage)
  16. if video is None:
  17. raise ExtractorError(u'Unable to extract video')
  18. video_url = video.group('video_url')
  19. ext = video.group('ext')
  20. video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
  21. webpage, u'thumbnail', fatal=False) # We pick the first poster
  22. if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
  23. # The only place where you can get a title, it's not complete,
  24. # but searching in other places doesn't work for all videos
  25. video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
  26. webpage, u'title', flags=re.DOTALL)
  27. return [{'id': video_id,
  28. 'url': video_url,
  29. 'title': video_title,
  30. 'thumbnail': video_thumbnail,
  31. 'ext': ext
  32. }]