You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.9 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. import os
  4. from .common import InfoExtractor
  5. class PyvideoIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
  7. _TESTS = [
  8. {
  9. 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
  10. 'md5': '520915673e53a5c5d487c36e0c4d85b5',
  11. 'info_dict': {
  12. 'id': '24_4WWkSmNo',
  13. 'ext': 'webm',
  14. 'title': 'Become a logging expert in 30 minutes',
  15. 'description': 'md5:9665350d466c67fb5b1598de379021f7',
  16. 'upload_date': '20130320',
  17. 'uploader': 'Next Day Video',
  18. 'uploader_id': 'NextDayVideo',
  19. },
  20. 'add_ie': ['Youtube'],
  21. },
  22. {
  23. 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
  24. 'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
  25. 'info_dict': {
  26. 'id': '2542',
  27. 'ext': 'm4v',
  28. 'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
  29. },
  30. },
  31. ]
  32. def _real_extract(self, url):
  33. mobj = re.match(self._VALID_URL, url)
  34. video_id = mobj.group('id')
  35. webpage = self._download_webpage(url, video_id)
  36. m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
  37. if m_youtube is not None:
  38. return self.url_result(m_youtube.group(1), 'Youtube')
  39. title = self._html_search_regex(
  40. r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
  41. webpage, 'title', flags=re.DOTALL)
  42. video_url = self._search_regex(
  43. [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
  44. webpage, 'video url', flags=re.DOTALL)
  45. return {
  46. 'id': video_id,
  47. 'title': os.path.splitext(title)[0],
  48. 'url': video_url,
  49. }