You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.0 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. parse_duration,
  6. unified_strdate,
  7. )
  8. class HuffPostIE(InfoExtractor):
  9. IE_DESC = 'Huffington Post'
  10. _VALID_URL = r'''(?x)
  11. https?://(embed\.)?live\.huffingtonpost\.com/
  12. (?:
  13. r/segment/[^/]+/|
  14. HPLEmbedPlayer/\?segmentId=
  15. )
  16. (?P<id>[0-9a-f]+)'''
  17. _TEST = {
  18. 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
  19. 'file': '52dd3e4b02a7602131000677.mp4',
  20. 'md5': 'TODO',
  21. 'info_dict': {
  22. 'title': 'TODO',
  23. 'description': 'TODO',
  24. 'duration': 1549,
  25. }
  26. }
  27. def _real_extract(self, url):
  28. mobj = re.match(self._VALID_URL, url)
  29. video_id = mobj.group('id')
  30. api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
  31. data = self._download_json(api_url, video_id)['data']
  32. video_title = data['title']
  33. duration = parse_duration(data['running_time'])
  34. upload_date = unified_strdate(data['schedule']['started_at'])
  35. thumbnails = []
  36. for url in data['images'].values():
  37. m = re.match('.*-([0-9]+x[0-9]+)\.', url)
  38. if not m:
  39. continue
  40. thumbnails.append({
  41. 'url': url,
  42. 'resolution': m.group(1),
  43. })
  44. formats = [{
  45. 'format': key,
  46. 'format_id': key.replace('/', '.'),
  47. 'ext': 'mp4',
  48. 'url': url,
  49. 'vcodec': 'none' if key.startswith('audio/') else None,
  50. } for key, url in data['sources']['live'].items()]
  51. self._sort_formats(formats)
  52. return {
  53. 'id': video_id,
  54. 'title': video_title,
  55. 'formats': formats,
  56. 'duration': duration,
  57. 'upload_date': upload_date,
  58. 'thumbnails': thumbnails,
  59. }