You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
2.3 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. parse_duration,
  5. parse_iso8601,
  6. )
  7. from ..compat import compat_str
  8. class DiscoveryIE(InfoExtractor):
  9. _VALID_URL = r'https?://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
  10. _TESTS = [{
  11. 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
  12. 'info_dict': {
  13. 'id': '20769',
  14. 'ext': 'mp4',
  15. 'title': 'Mission Impossible Outtakes',
  16. 'description': ('Watch Jamie Hyneman and Adam Savage practice being'
  17. ' each other -- to the point of confusing Jamie\'s dog -- and '
  18. 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
  19. ' back.'),
  20. 'duration': 156,
  21. 'timestamp': 1303099200,
  22. 'upload_date': '20110418',
  23. },
  24. 'params': {
  25. 'skip_download': True, # requires ffmpeg
  26. }
  27. }, {
  28. 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
  29. 'info_dict': {
  30. 'id': 'mythbusters-the-simpsons',
  31. 'title': 'MythBusters: The Simpsons',
  32. },
  33. 'playlist_count': 9,
  34. }]
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. info = self._download_json(url + '?flat=1', video_id)
  38. video_title = info.get('playlist_title') or info.get('video_title')
  39. entries = [{
  40. 'id': compat_str(video_info['id']),
  41. 'formats': self._extract_m3u8_formats(
  42. video_info['src'], video_id, ext='mp4',
  43. note='Download m3u8 information for video %d' % (idx + 1)),
  44. 'title': video_info['title'],
  45. 'description': video_info.get('description'),
  46. 'duration': parse_duration(video_info.get('video_length')),
  47. 'webpage_url': video_info.get('href'),
  48. 'thumbnail': video_info.get('thumbnailURL'),
  49. 'alt_title': video_info.get('secondary_title'),
  50. 'timestamp': parse_iso8601(video_info.get('publishedDate')),
  51. } for idx, video_info in enumerate(info['playlist'])]
  52. return self.playlist_result(entries, video_id, video_title)