You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
2.4 KiB

  1. import re
  2. import json
  3. from .common import InfoExtractor
  4. from ..utils import unescapeHTML
  5. class OoyalaIE(InfoExtractor):
  6. _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
  7. _TEST = {
  8. # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
  9. u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
  10. u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
  11. u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
  12. u'info_dict': {
  13. u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
  14. u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
  15. },
  16. }
  17. def _extract_result(self, info, more_info):
  18. return {'id': info['embedCode'],
  19. 'ext': 'mp4',
  20. 'title': unescapeHTML(info['title']),
  21. 'url': info['url'],
  22. 'description': unescapeHTML(more_info['description']),
  23. 'thumbnail': more_info['promo'],
  24. }
  25. def _real_extract(self, url):
  26. mobj = re.match(self._VALID_URL, url)
  27. embedCode = mobj.group('id')
  28. player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
  29. player = self._download_webpage(player_url, embedCode)
  30. mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
  31. player, u'mobile player url')
  32. mobile_player = self._download_webpage(mobile_url, embedCode)
  33. videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
  34. videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
  35. videos_info = json.loads(videos_info)
  36. videos_more_info =json.loads(videos_more_info)
  37. if videos_more_info.get('lineup'):
  38. videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
  39. return {'_type': 'playlist',
  40. 'id': embedCode,
  41. 'title': unescapeHTML(videos_more_info['title']),
  42. 'entries': videos,
  43. }
  44. else:
  45. return self._extract_result(videos_info[0], videos_more_info)