You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

45 lines
1.6 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. parse_iso8601,
  5. int_or_none,
  6. )
  7. class DiscoveryIE(InfoExtractor):
  8. _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
  9. _TEST = {
  10. 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
  11. 'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
  12. 'info_dict': {
  13. 'id': 'mission-impossible-outtakes',
  14. 'ext': 'flv',
  15. 'title': 'Mission Impossible Outtakes',
  16. 'description': ('Watch Jamie Hyneman and Adam Savage practice being'
  17. ' each other -- to the point of confusing Jamie\'s dog -- and '
  18. 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
  19. ' back.'),
  20. 'duration': 156,
  21. 'timestamp': 1303099200,
  22. 'upload_date': '20110418',
  23. },
  24. }
  25. def _real_extract(self, url):
  26. video_id = self._match_id(url)
  27. webpage = self._download_webpage(url, video_id)
  28. info = self._parse_json(self._search_regex(
  29. r'(?s)<script type="application/ld\+json">(.*?)</script>',
  30. webpage, 'video info'), video_id)
  31. return {
  32. 'id': video_id,
  33. 'title': info['name'],
  34. 'url': info['contentURL'],
  35. 'description': info.get('description'),
  36. 'thumbnail': info.get('thumbnailUrl'),
  37. 'timestamp': parse_iso8601(info.get('uploadDate')),
  38. 'duration': int_or_none(info.get('duration')),
  39. }